# initialisation code copied on file GEO872-session-01-solution.rmd
# packages:
## Default repository
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)
})
check_pkg <- function(x)
{
if (!require(x,character.only = TRUE, quietly = TRUE))
{
install.packages(x,dep=TRUE)
if(!require(x,character.only = TRUE, quietly = TRUE)) stop("Package not found")
}
}
# Call check_pkg() to install/load the required packages.
check_pkg("here")
check_pkg("sf")
check_pkg("readr")
check_pkg("ggplot2")
check_pkg("dplyr")
check_pkg("lubridate") #for timezones
check_pkg("raster")
check_pkg("leaflet")
check_pkg("maptools")
check_pkg("concaveman") # for concave hulls
# Plot
check_pkg("tidyverse")
check_pkg("hrbrthemes")
check_pkg("viridis")
check_pkg("tidyr")
check_pkg("forcats")
check_pkg("RColorBrewer")
check_pkg("extrafont")
check_pkg("ggcorrplot") #for correlation matrix
# Statistics
check_pkg("rstatix")
check_pkg("caret")
# Machine learning
check_pkg("caTools")
check_pkg("class")
check_pkg("forecast")
check_pkg("MLmetrics")
check_pkg("randomForest")
check_pkg("rpart")
#https://cran.r-project.org/web/packages/rmdwc/rmdwc.pdf
check_pkg("rmdwc")
#rmdcountAddin()
# DATA FOLDER
#On Martine laptop:
data_folder <- "D:/Documents/Master/UZH/cours/GEO880_Computational Movement Analysis/Project/FS23_880Project_G3/data"
#On Yelu laptop:
#data_folder <- "D:/GEO880/Project"
Our project aims at defining, describing and identifying movement patterns for different travel modes. Movement data has been collected over a period of approximately two months and a various range of travel modes have been recorded: train, tram, bus, car, walking, cycling, running, kick-scooter, ski, ski-lift and cable-car, boat and plane. A labeled dataset with travel modes will be extracted from the collected raw data by cleaning, filtering and segmentation.
The movement pattern for each travel mode will first be characterized by the following metrics: speed (acceleration, deceleration), sinuosity (angle, direction), and environmental factors related to the studied travel mode: proximity to railway tracks, tram lines, road network, walking path, slope, etc.
In a second step, the suitability of these characteristics for identifying travel modes will be studied. Limitation of data and methods will also be discussed. For example are the difference of the aforementioned chracteristics significant between two travel modes, and does our recorded data contain enough information.
Based on the answer, a model will then be attempted to build. We will apply it to collected data to detect different travel modes. The results of the identification will finally be evaluated with the labeled dataset.
We looked at papers
First the trajectories data will be prepared to be fed for machine learning: data cleaning, labelizing, values extracted.
To train a machine learning to classify the segments in the correct
categories, we first need to clean and prepare our data.
Firstly, the points with be manually labelized with the correct
transport modes. Secondly, the points will be segmentized based on time
gaps (error in recording or simply break in movement (night)) Then,
trajectory derivatives such as speed, sinuosity will be calculated.
Labels list: boat, train, etc.
Trajectories derivatives useful to determine mean of transport: for ex. mean speed, sinuosity (train, cable car, ski lift -> not much sinuosity). Walk, car, ski -> more sinuosity.
Here list the derivatives ?
We need Height for:—
We are working with two trajectory points datasets: one from posmo,
and one from a GPS tracker. Some inital cleanings and adaptations are
required to create an homogen trajectory dataset. In both datasets, the
columns were prepared in sort to have the basic attributes:
- source (posmo or GPS)
- datetime
- tmode_manual - E, N, H - geometry
Data cleaning and basic processing
The trajectory points downloaded from posmo are recorded in the
coordinate system WGS84. The points were then converted in the Swiss
system (MN95), and the coordinates extracted. Unfortunately, posmo
platform does not give the possibility to get the recorded altitude of
the points. To have an approximation of the height, the altitudes were
extracted thanks to the Digital Height Model DHM25 provided by
swisstopo. The DHM25 gives altitude every 25m. We estimated the
precision will be enough for our project. DHM25 is available as a
raster. To save calculation time in the project, this was first
preprocessed on QGIS to be converted in the new Swiss projection system
(MN95, previously MN03).
An issue with this method, is that this extraction gives the altitude
on the ground, and not of the user. This is of poor impact for
terrestrial movements, such as walks, trains, etc. as the differences is
not more than 5 meters. As we look at ranges (above ~ 1200m) or relative
derivatives (slope, speed), this is not too much influence.
However, this is more important for plane: the altitude of the user
cannot possibly be taken this way. Therefore, no altitude value were
assigned to plane segments.
# import and convert to sf object
posmo <- read_delim(here(data_folder, "posmo_2023-07-02.csv"), ",",show_col_types = FALSE) %>% st_as_sf(coords = c("lon_x", "lat_y"), crs=4326, remove = FALSE) |> st_transform(2056)
# Remove/rename columns
posmo <- posmo %>%
rename("tmode_posmo" = "transport_mode")
to_remove <- c("place_name", "lon_x", "lat_y", "user_id")
posmo <- posmo[ , !(names(posmo) %in% to_remove)]
# Add source
posmo$source <- "posmo"
# Add column for travel mode
posmo$tmode_manual <- NA
# add East and North columns
coords <- posmo |> st_coordinates()
posmo <- posmo |>
mutate(E = coords[,1], N = coords[,2])
# Filter points outside of Switzerland
ch_ll <- c(2460666, 1069416)
ch_ur <- c(2849000, 1300750)
posmo <- posmo %>%
filter((E > ch_ll[1] & E < ch_ur[1] & N > ch_ll[2] & N < ch_ur[2]))
# Update the timezone
posmo <- posmo %>%
mutate(
datetime = datetime %>% with_tz(tzone = "Europe/Zurich")
)
# Add heights
## Extract values from rasters:
dhm25 <- raster::raster(here(data_folder, "dhm25_raster", "dhm25_2056.tif"))
rasValue <- raster::extract(dhm25, posmo)
posmo$H <- rasValue
Labelizing data
#select boats segments
condition_boats <- posmo$datetime > as.POSIXct("2023-05-14 13:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:21:00", tz = "Europe/Zurich")
#select planes segments
condition_planes <- posmo$datetime > as.POSIXct("2023-06-17 11:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 12:22:00", tz = "Europe/Zurich")
#select cable car segments
condition_cable_cars <-
posmo$datetime > as.POSIXct("2023-04-12 09:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:18:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 12:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 12:27:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:18:10", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:24:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 13:11:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 13:20:00", tz = "Europe/Zurich")
# select ski lifts segments
condition_ski_lifts <-
posmo$datetime > as.POSIXct("2023-04-12 09:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:41:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 9:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:24:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:57:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:08:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:21:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 13:49:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:54:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 13:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:14:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:22:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:30:45", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:36:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:58:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:08:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:28:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:28:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:43:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:48:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:56:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:14:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:23:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:30:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:38:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:06:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:14:30", tz = "Europe/Zurich")
# select t-bar segments
condition_t_bars <-
posmo$datetime > as.POSIXct("2023-04-12 10:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 14:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:54:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 11:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:19:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:26:45", tz = "Europe/Zurich")
# select ski slopes segments
condition_skis <-
posmo$datetime > as.POSIXct("2023-04-12 09:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 9:51:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 10:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 10:57:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:08:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:21:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 11:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 11:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:49:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 13:54:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 13:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 14:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 14:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 14:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:19:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:22:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:30:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:36:45", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 09:44:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 09:58:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:08:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:28:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:40:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 10:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 10:54:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 11:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 11:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 15:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 15:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:24:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:28:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:43:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 9:48:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 09:56:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:14:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:23:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 10:30:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 10:38:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:06:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:14:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 11:26:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 11:42:30", tz = "Europe/Zurich")
#select kick scooters segments
condition_scooters <-
posmo$datetime > as.POSIXct("2023-05-03 19:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 19:56:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 21:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 21:56:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 07:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 07:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 22:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 22:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 19:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 20:00:00", tz = "Europe/Zurich")
#select bikes segments
condition_bikes <-
posmo$datetime > as.POSIXct("2023-05-17 07:43:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 07:57:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-17 18:01:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 18:12:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-23 07:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-23 08:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-23 17:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-23 18:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 07:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 08:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 17:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 17:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-25 07:43:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-25 07:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 08:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 08:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 17:39:15", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 17:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 08:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 08:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 11:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 12:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 17:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 18:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-15 15:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-15 16:32:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-01 07:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 07:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-01 18:38:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 18:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 08:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 17:32:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 17:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 18:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 18:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:35:00", tz = "Europe/Zurich")
#select train segments
condition_trains <-
posmo$datetime > as.POSIXct("2023-05-17 19:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 22:15:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 07:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 07:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 15:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 15:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 15:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 15:34:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 22:38:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 22:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 08:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 08:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 15:44:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 15:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 22:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 22:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 23:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 23:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 07:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 17:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 18:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 06:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 06:54:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 07:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 07:31:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 12:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 13:13:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 18:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 19:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 14:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 14:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 18:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-09 8:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 10:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 12:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 19:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 21:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 12:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 12:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-11 02:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 02:45:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-12 07:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-12 07:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-14 17:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 17:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 09:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 11:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 08:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 18:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 19:11:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:45:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 16:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 07:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 15:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 17:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-03-31 16:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-03-31 16:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-03 21:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 00:22:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 17:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 17:57:00", tz = "Europe/Zurich")
#select trams segments
condition_trams <-
posmo$datetime > as.POSIXct("2023-05-22 07:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 08:03:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 10:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 10:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:08:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 16:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 16:32:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 16:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 16:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:58:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 22:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 22:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-09 11:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 11:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 15:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 15:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 13:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 13:20:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 14:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:52:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-01 18:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-01 18:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 08:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 08:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 08:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 08:46:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 15:21:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 15:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 20:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 20:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 17:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 17:22:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 13:54:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 11:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 11:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 18:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 19:03:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 23:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 23:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 15:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 15:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 10:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:19:45", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 18:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 18:56:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 23:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:16:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-04 16:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 17:01:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-04 19:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 19:41:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-05 08:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-05 08:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 07:50:00", tz = "Europe/Zurich")
#select bus segments
condition_bus <-
posmo$datetime > as.POSIXct("2023-05-17 19:29:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 19:35:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 18:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 19:01:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 23:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 23:04:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 19:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 19:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 19:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 19:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 21:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 21:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 14:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 15:07:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:22:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:39:20", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 14:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 14:55:40", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 23:13:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 23:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 18:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 18:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 18:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 18:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-02 23:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-02 23:30:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 19:30:15", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 19:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-03 22:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-03 22:32:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 07:42:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 07:53:15", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-04 16:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-04 16:30:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 06:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 06:53:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 07:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 07:50:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 22:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 22:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-10 16:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-10 16:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-10 18:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-10 18:41:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-10 22:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-10 22:11:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-11 13:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-11 13:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-11 14:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-11 14:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 06:56:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 07:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 18:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 18:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 19:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 19:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 21:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 21:27:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 22:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:52:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 12:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 13:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 17:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 17:38:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 07:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 07:50:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 14:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 15:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 18:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 18:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 10:42:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 10:49:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 12:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 12:53:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-26 16:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-26 17:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 13:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 15:38:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 15:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 12:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:13:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 10:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 10:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 16:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 16:21:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 18:26:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 18:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 08:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 08:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 17:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 19:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 19:51:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 08:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 08:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 18:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 18:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 19:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 19:49:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 22:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 22:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 06:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 06:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 07:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 07:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 15:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 15:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 16:51:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 16:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 19:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 19:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 14:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 14:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 18:16:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 18:45:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 20:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 20:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 11:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 11:31:40", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 17:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 17:31:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 19:32:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 19:49:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-24 22:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-24 22:22:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-07 07:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 08:12:00", tz = "Europe/Zurich") | posmo$datetime > as.POSIXct("2023-06-07 08:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 08:45:00", tz = "Europe/Zurich") | posmo$datetime > as.POSIXct("2023-06-07 19:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-07 19:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 20:39:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 20:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 21:03:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 21:17:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 08:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 08:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 18:37:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 18:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 11:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 11:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 16:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 16:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 16:07:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:16:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:23:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:29:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 23:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 06:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 07:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 07:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-01 22:17:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-01 22:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 21:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 22:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-03-31 16:13:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-03-31 16:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-04 19:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-04 19:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 07:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 07:33:00", tz = "Europe/Zurich")
#select walking segments
condition_walks <-
posmo$datetime > as.POSIXct("2023-05-18 11:54:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-18 11:57:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-18 22:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-18 22:08:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 18:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 18:53:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 19:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 20:01:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 23:04:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 23:08:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 14:05:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 18:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 09:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 10:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 10:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 10:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 11:56:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 11:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 12:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 12:36:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-25 20:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-25 20:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 15:07:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 15:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:32:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-26 19:39:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-26 19:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 14:55:40", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 14:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 15:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 21:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 18:24:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 18:27:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 18:33:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 18:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 19:12:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 20:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 07:50:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 08:50:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 15:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 15:44:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-05 18:20:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-05 21:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-06 19:01:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-06 19:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-07 03:06:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-07 03:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 18:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 18:58:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 19:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-08 22:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-08 22:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-09 11:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-09 11:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-11 07:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-11 07:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 07:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 07:56:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 16:40:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 16:44:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 16:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 16:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-12 17:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-12 18:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 13:20:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 13:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 14:36:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 14:40:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-14 14:52:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-14 16:31:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 07:50:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 08:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-20 14:39:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-20 14:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-23 12:22:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-23 14:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 10:36:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 10:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 11:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 11:19:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 12:34:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 12:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 12:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 12:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 13:41:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 14:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 12:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 12:06:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-01 08:02:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-01 08:14:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 08:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 08:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 17:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 17:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-18 17:55:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-18 18:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 08:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 08:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 18:27:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 18:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 19:49:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 19:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-19 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-19 21:59:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 07:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 12:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 12:47:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 13:13:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 13:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 13:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 14:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 19:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 20:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-21 22:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-21 22:58:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 18:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 18:16:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 18:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 19:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-16 11:31:40", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-16 11:40:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-16 15:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-16 15:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-16 15:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-16 15:56:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 13:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 15:01:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 16:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:01:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-13 22:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-13 22:29:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-20 11:48:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 22:35:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 10:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 11:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 16:18:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 16:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 11:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 12:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-11 02:46:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 03:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-12 07:53:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-12 08:07:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-06 08:11:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-06 08:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-05 23:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-05 23:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 09:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 09:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-12 18:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-12 18:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 11:57:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 12:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-25 15:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-25 15:50:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 08:43:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 08:46:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-13 18:05:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-13 18:15:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 13:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 13:25:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 10:19:45", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 10:24:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 18:56:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 19:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 18:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 18:45:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 21:41:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 22:23:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 15:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:04:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 16:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 16:28:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 22:31:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 22:45:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:16:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:19:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-18 12:29:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-18 12:33:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 17:59:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 18:12:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 22:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 23:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 15:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 15:23:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 17:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:02:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 18:04:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 18:09:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 19:47:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 20:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-31 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-31 21:55:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 18:29:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 18:45:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 22:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 22:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-11 11:51:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-11 11:56:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-11 15:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-11 15:36:00", tz = "Europe/Zurich")
#select car segments
condition_cars <-
posmo$datetime > as.POSIXct("2023-05-17 22:23:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-17 22:38:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-19 23:08:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-19 23:41:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-20 11:18:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 11:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-20 22:35:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-20 22:51:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 13:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 13:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 13:15:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 14:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 18:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 18:20:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-21 20:01:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-21 20:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-22 07:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-22 07:27:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-27 09:20:20", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-27 09:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-27 11:52:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-27 12:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 04:19:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 05:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-28 21:51:40", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-28 22:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 15:28:59", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 16:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-29 17:28:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-29 18:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-23 20:09:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-23 20:42:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-28 18:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-28 18:48:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-30 21:56:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-30 22:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-05-01 07:24:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-05-01 07:28:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-17 07:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-17 07:57:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-22 03:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-22 04:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-11 15:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-11 17:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-24 14:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-24 17:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-10 13:40:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-10 17:33:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-09 21:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-09 22:30:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-14 07:10:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 08:00:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-14 17:44:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-14 18:10:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-13 18:00:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-13 19:15:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-15 14:25:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-15 21:41:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-15 07:20:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-15 08:04:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-16 23:06:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-16 23:40:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-17 14:34:30", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-17 14:42:30", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-04 17:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-04 17:59:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-03 21:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-03 22:18:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-06-02 18:02:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-06-02 18:43:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-06 21:30:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-06 22:37:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-09 09:50:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-09 12:05:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 08:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 09:39:00", tz = "Europe/Zurich") |
posmo$datetime > as.POSIXct("2023-04-10 18:45:00", tz = "Europe/Zurich") & posmo$datetime < as.POSIXct("2023-04-10 21:10:00", tz = "Europe/Zurich")
#final labelizing
posmo <- posmo %>%
mutate(tmode_manual = case_when(condition_walks ~ 'walk',
condition_boats ~ 'boat',
condition_planes ~ 'plane',
condition_trains ~ 'train',
condition_bikes ~ 'bike',
condition_scooters ~ 'kick-scooter',
condition_cars ~ 'car',
condition_bus ~ 'bus',
condition_trams ~ 'tram',
condition_t_bars ~ 't_bar',
condition_cable_cars ~ 'cable_car',
condition_ski_lifts ~ 'ski_lift',
condition_skis ~ 'ski',
is.na(posmo$tmode_manual) == TRUE ~ 'unclassified'
))
# Remove heights for plane (bc completely false):
posmo$H[posmo$tmode_manual == "plane"] <- 0.0
# select columns
posmo <- dplyr::select(posmo, c("source", "datetime", "tmode_manual", "E", "N", "H", "geometry"))
In the posmo dataset, have been recorded the following transport modes, ordered from the highest number of points (n) to the lowest:
#count of of points per means of transports:
posmo_tmode_count <- posmo %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(posmo_tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport - posmo")
| tmode_manual | n |
|---|---|
| unclassified | 12232 |
| car | 9422 |
| walk | 8575 |
| bus | 5080 |
| train | 5075 |
| bike | 2306 |
| tram | 1964 |
| ski | 1795 |
| ski_lift | 1148 |
| plane | 465 |
| cable_car | 225 |
| t_bar | 183 |
| kick-scooter | 149 |
| boat | 85 |
Data cleaning and basic processing
The trajectory points recorded on GPS are recorded in the coordinate
system WGS84. The points were then converted in the Swiss system (MN95),
and the coordinates extracted. And the time recorded was converted to
the time zone ‘Europe/Zurich’. No further preparation were required.
## Load the data
gps_data_raw <- read_delim(here(data_folder, "yelu_dataset_all.csv"), ";")
## Use right coordinate system and preserve original E/N columns
gps_data_raw <- st_as_sf(gps_data_raw, coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
gps_data <- gps_data_raw %>% st_transform(crs = 2056)
gps_data$Datetime <- as.POSIXct(paste(as.Date(gps_data$Date, format = '%d.%m.%Y'), gps_data$Time), format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
gps_data <- gps_data %>% mutate(Datetime = Datetime %>% with_tz(tzone = "Europe/Zurich"))
gps_data$source <- "gps"
# Extract E and N values
coords <- gps_data |> st_coordinates()
gps_data <- gps_data |>
mutate(Latitude = coords[,2], Longitude = coords[,1])
# Rename columns
colnames(gps_data)[11] <- "datetime"
colnames(gps_data)[3] <- "N"
colnames(gps_data)[4] <- "E"
colnames(gps_data)[8] <- "H"
# Add column for travel mode
gps_data$tmode_manual <- NA
gps_data <- dplyr::select(gps_data, c("source", "datetime", "N", "E", "H", "geometry", "tmode_manual"))
Labelizing data
# Conditions for different travel modes
condition_bus <-
gps_data$datetime >= as.POSIXct("2023-04-21 16:18:35") & gps_data$datetime <= as.POSIXct("2023-04-21 16:23:40") |
gps_data$datetime >= as.POSIXct("2023-04-30 18:04:25") & gps_data$datetime <= as.POSIXct("2023-04-30 18:20:15") |
gps_data$datetime >= as.POSIXct("2023-04-30 19:46:20") & gps_data$datetime <= as.POSIXct("2023-04-30 19:59:25") |
gps_data$datetime >= as.POSIXct("2023-05-10 19:31:25") & gps_data$datetime <= as.POSIXct("2023-05-10 19:34:55") |
gps_data$datetime >= as.POSIXct("2023-05-10 22:43:00") & gps_data$datetime <= as.POSIXct("2023-05-10 22:56:05") |
gps_data$datetime >= as.POSIXct("2023-05-12 07:46:00") & gps_data$datetime <= as.POSIXct("2023-05-12 07:51:20") |
gps_data$datetime >= as.POSIXct("2023-05-26 07:45:20") & gps_data$datetime <= as.POSIXct("2023-05-26 07:50:25") |
gps_data$datetime >= as.POSIXct("2023-06-05 14:59:35") & gps_data$datetime <= as.POSIXct("2023-06-05 15:09:00") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:17:55") & gps_data$datetime <= as.POSIXct("2023-06-05 15:27:40")
condition_tram <-
gps_data$datetime >= as.POSIXct("2023-04-21 17:18:55") & gps_data$datetime <= as.POSIXct("2023-04-21 17:26:05") |
gps_data$datetime >= as.POSIXct("2023-04-25 15:07:50") & gps_data$datetime <= as.POSIXct("2023-04-25 15:11:55") |
gps_data$datetime >= as.POSIXct("2023-04-25 17:40:50") & gps_data$datetime <= as.POSIXct("2023-04-25 17:47:20") |
gps_data$datetime >= as.POSIXct("2023-04-25 18:57:55") & gps_data$datetime <= as.POSIXct("2023-04-25 19:04:12") |
gps_data$datetime >= as.POSIXct("2023-04-25 19:50:45") & gps_data$datetime <= as.POSIXct("2023-04-25 19:54:25") |
gps_data$datetime >= as.POSIXct("2023-04-25 20:26:10") & gps_data$datetime <= as.POSIXct("2023-04-25 20:32:05") |
gps_data$datetime >= as.POSIXct("2023-04-29 18:06:45") & gps_data$datetime <= as.POSIXct("2023-04-29 18:12:20") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:32:55") & gps_data$datetime <= as.POSIXct("2023-04-30 16:34:50") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:45:05") & gps_data$datetime <= as.POSIXct("2023-04-30 16:56:30") |
gps_data$datetime >= as.POSIXct("2023-04-30 17:46:25") & gps_data$datetime <= as.POSIXct("2023-04-30 18:00:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 20:10:35") & gps_data$datetime <= as.POSIXct("2023-04-30 20:12:05") |
gps_data$datetime >= as.POSIXct("2023-05-01 12:56:05") & gps_data$datetime <= as.POSIXct("2023-05-01 13:04:10") |
gps_data$datetime >= as.POSIXct("2023-05-01 13:16:50") & gps_data$datetime <= as.POSIXct("2023-05-01 13:25:05") |
gps_data$datetime >= as.POSIXct("2023-05-01 13:25:55") & gps_data$datetime <= as.POSIXct("2023-05-01 13:29:40") |
gps_data$datetime >= as.POSIXct("2023-05-01 19:28:40") & gps_data$datetime <= as.POSIXct("2023-05-01 19:30:35") |
gps_data$datetime >= as.POSIXct("2023-05-06 13:42:15") & gps_data$datetime <= as.POSIXct("2023-05-06 13:54:15") |
gps_data$datetime >= as.POSIXct("2023-05-06 13:58:00") & gps_data$datetime <= as.POSIXct("2023-05-06 14:04:35") |
gps_data$datetime >= as.POSIXct("2023-05-06 15:51:20") & gps_data$datetime <= as.POSIXct("2023-05-06 15:59:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:07:35") & gps_data$datetime <= as.POSIXct("2023-05-06 16:18:05") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:27:00") & gps_data$datetime <= as.POSIXct("2023-05-06 16:33:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 17:13:15") & gps_data$datetime <= as.POSIXct("2023-05-06 17:20:05") |
gps_data$datetime >= as.POSIXct("2023-05-07 17:32:15") & gps_data$datetime <= as.POSIXct("2023-05-07 17:34:00") |
gps_data$datetime >= as.POSIXct("2023-05-07 18:09:50") & gps_data$datetime <= as.POSIXct("2023-05-07 18:26:55") |
gps_data$datetime >= as.POSIXct("2023-05-07 18:43:35") & gps_data$datetime <= as.POSIXct("2023-05-07 18:46:25") |
gps_data$datetime >= as.POSIXct("2023-05-07 19:41:25") & gps_data$datetime <= as.POSIXct("2023-05-07 19:44:50") |
gps_data$datetime >= as.POSIXct("2023-05-07 19:47:25") & gps_data$datetime <= as.POSIXct("2023-05-07 20:09:15") |
gps_data$datetime >= as.POSIXct("2023-05-08 19:25:25") & gps_data$datetime <= as.POSIXct("2023-05-08 19:28:35") |
gps_data$datetime >= as.POSIXct("2023-05-08 19:49:45") & gps_data$datetime <= as.POSIXct("2023-05-08 19:53:25") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:16:45") & gps_data$datetime <= as.POSIXct("2023-05-10 17:28:45") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:52:15") & gps_data$datetime <= as.POSIXct("2023-05-10 17:53:15") |
gps_data$datetime >= as.POSIXct("2023-05-12 18:00:50") & gps_data$datetime <= as.POSIXct("2023-05-12 18:08:15") |
gps_data$datetime >= as.POSIXct("2023-05-13 17:39:25") & gps_data$datetime <= as.POSIXct("2023-05-13 17:41:25") |
gps_data$datetime >= as.POSIXct("2023-05-16 17:48:05") & gps_data$datetime <= as.POSIXct("2023-05-16 17:50:00") |
gps_data$datetime >= as.POSIXct("2023-05-17 14:27:15") & gps_data$datetime <= as.POSIXct("2023-05-17 14:35:00") |
gps_data$datetime >= as.POSIXct("2023-05-17 14:52:15") & gps_data$datetime <= as.POSIXct("2023-05-17 15:00:25") |
gps_data$datetime >= as.POSIXct("2023-05-17 15:22:40") & gps_data$datetime <= as.POSIXct("2023-05-17 15:30:10") |
gps_data$datetime >= as.POSIXct("2023-05-18 14:45:50") & gps_data$datetime <= as.POSIXct("2023-05-18 14:56:25") |
gps_data$datetime >= as.POSIXct("2023-05-18 16:00:10") & gps_data$datetime <= as.POSIXct("2023-05-18 16:07:10") |
gps_data$datetime >= as.POSIXct("2023-05-20 16:43:35") & gps_data$datetime <= as.POSIXct("2023-05-20 16:51:35") |
gps_data$datetime >= as.POSIXct("2023-05-20 17:00:30") & gps_data$datetime <= as.POSIXct("2023-05-20 17:07:45") |
gps_data$datetime >= as.POSIXct("2023-05-20 20:30:40") & gps_data$datetime <= as.POSIXct("2023-05-20 20:45:40") |
gps_data$datetime >= as.POSIXct("2023-05-21 19:10:00") & gps_data$datetime <= as.POSIXct("2023-05-21 19:14:15") |
gps_data$datetime >= as.POSIXct("2023-05-21 19:16:50") & gps_data$datetime <= as.POSIXct("2023-05-21 19:37:25") |
gps_data$datetime >= as.POSIXct("2023-05-21 20:25:00") & gps_data$datetime <= as.POSIXct("2023-05-21 20:29:10") |
gps_data$datetime >= as.POSIXct("2023-05-21 20:37:45") & gps_data$datetime <= as.POSIXct("2023-05-21 20:47:35") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:36:35") & gps_data$datetime <= as.POSIXct("2023-05-23 17:37:15") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:43:35") & gps_data$datetime <= as.POSIXct("2023-05-23 17:57:50") |
gps_data$datetime >= as.POSIXct("2023-05-23 19:19:35") & gps_data$datetime <= as.POSIXct("2023-05-23 19:34:45") |
gps_data$datetime >= as.POSIXct("2023-05-24 13:19:10") & gps_data$datetime <= as.POSIXct("2023-05-24 13:24:35") |
gps_data$datetime >= as.POSIXct("2023-05-24 17:39:00") & gps_data$datetime <= as.POSIXct("2023-05-24 17:55:35") |
gps_data$datetime >= as.POSIXct("2023-05-25 14:46:30") & gps_data$datetime <= as.POSIXct("2023-05-25 14:55:30") |
gps_data$datetime >= as.POSIXct("2023-05-25 15:19:20") & gps_data$datetime <= as.POSIXct("2023-05-25 15:28:35") |
gps_data$datetime >= as.POSIXct("2023-05-26 13:17:05") & gps_data$datetime <= as.POSIXct("2023-05-26 13:31:20") |
gps_data$datetime >= as.POSIXct("2023-05-26 13:53:00") & gps_data$datetime <= as.POSIXct("2023-05-26 14:10:00") |
gps_data$datetime >= as.POSIXct("2023-05-26 14:16:00") & gps_data$datetime <= as.POSIXct("2023-05-26 14:18:05") |
gps_data$datetime >= as.POSIXct("2023-05-28 14:52:20") & gps_data$datetime <= as.POSIXct("2023-05-28 15:04:05") |
gps_data$datetime >= as.POSIXct("2023-05-28 15:22:40") & gps_data$datetime <= as.POSIXct("2023-05-28 15:38:50") |
gps_data$datetime >= as.POSIXct("2023-05-28 20:20:00") & gps_data$datetime <= as.POSIXct("2023-05-28 20:37:00") |
gps_data$datetime >= as.POSIXct("2023-05-28 20:45:05") & gps_data$datetime <= as.POSIXct("2023-05-28 20:56:10") |
gps_data$datetime >= as.POSIXct("2023-05-29 17:40:05") & gps_data$datetime <= as.POSIXct("2023-05-29 17:59:50") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:19:55") & gps_data$datetime <= as.POSIXct("2023-05-29 18:27:35") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:28:35") & gps_data$datetime <= as.POSIXct("2023-05-29 18:36:00") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:56:35") & gps_data$datetime <= as.POSIXct("2023-05-29 19:09:30") |
gps_data$datetime >= as.POSIXct("2023-05-30 17:31:10") & gps_data$datetime <= as.POSIXct("2023-05-30 17:42:50") |
gps_data$datetime >= as.POSIXct("2023-05-30 19:04:55") & gps_data$datetime <= as.POSIXct("2023-05-30 19:48:50") |
gps_data$datetime >= as.POSIXct("2023-05-31 17:08:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:09:40") |
gps_data$datetime >= as.POSIXct("2023-05-31 17:34:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:42:25") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:29:10") & gps_data$datetime <= as.POSIXct("2023-06-02 13:30:45") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:36:25") & gps_data$datetime <= as.POSIXct("2023-06-02 13:50:55") |
gps_data$datetime >= as.POSIXct("2023-06-02 14:55:10") & gps_data$datetime <= as.POSIXct("2023-06-02 15:13:10") |
gps_data$datetime >= as.POSIXct("2023-06-04 16:58:15") & gps_data$datetime <= as.POSIXct("2023-06-04 17:00:00") |
gps_data$datetime >= as.POSIXct("2023-06-04 17:09:20") & gps_data$datetime <= as.POSIXct("2023-06-04 17:14:10") |
gps_data$datetime >= as.POSIXct("2023-06-04 18:18:35") & gps_data$datetime <= as.POSIXct("2023-06-04 18:22:25") |
gps_data$datetime >= as.POSIXct("2023-06-04 18:48:20") & gps_data$datetime <= as.POSIXct("2023-06-04 19:03:35") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:37:15") & gps_data$datetime <= as.POSIXct("2023-06-05 15:40:15") |
gps_data$datetime >= as.POSIXct("2023-06-09 13:51:50") & gps_data$datetime <= as.POSIXct("2023-06-09 13:53:45") |
gps_data$datetime >= as.POSIXct("2023-06-10 18:35:10") & gps_data$datetime <= as.POSIXct("2023-06-10 18:48:30") |
gps_data$datetime >= as.POSIXct("2023-06-10 20:48:30") & gps_data$datetime <= as.POSIXct("2023-06-10 21:01:00") |
gps_data$datetime >= as.POSIXct("2023-06-12 19:07:00") & gps_data$datetime <= as.POSIXct("2023-06-12 19:14:45") |
gps_data$datetime >= as.POSIXct("2023-06-12 19:43:20") & gps_data$datetime <= as.POSIXct("2023-06-12 19:45:40")
condition_walk <-
gps_data$datetime >= as.POSIXct("2023-04-21 16:13:40") & gps_data$datetime <= as.POSIXct("2023-04-21 16:16:25") |
gps_data$datetime >= as.POSIXct("2023-04-25 15:13:05") & gps_data$datetime <= as.POSIXct("2023-04-25 15:18:00") |
gps_data$datetime >= as.POSIXct("2023-04-25 15:37:00") & gps_data$datetime <= as.POSIXct("2023-04-25 15:40:15") |
gps_data$datetime >= as.POSIXct("2023-04-25 19:05:05") & gps_data$datetime <= as.POSIXct("2023-04-25 19:08:25") |
gps_data$datetime >= as.POSIXct("2023-04-25 20:32:45") & gps_data$datetime <= as.POSIXct("2023-04-25 20:35:05") |
gps_data$datetime >= as.POSIXct("2023-04-29 21:41:45") & gps_data$datetime <= as.POSIXct("2023-04-29 22:19:40") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:25:55") & gps_data$datetime <= as.POSIXct("2023-04-30 16:29:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 16:57:00") & gps_data$datetime <= as.POSIXct("2023-04-30 17:45:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 18:20:25") & gps_data$datetime <= as.POSIXct("2023-04-30 19:39:00") |
gps_data$datetime >= as.POSIXct("2023-04-30 20:13:20") & gps_data$datetime <= as.POSIXct("2023-04-30 20:15:40") |
gps_data$datetime >= as.POSIXct("2023-05-01 13:31:50") & gps_data$datetime <= as.POSIXct("2023-05-01 19:22:20") |
gps_data$datetime >= as.POSIXct("2023-05-01 19:31:55") & gps_data$datetime <= as.POSIXct("2023-05-01 19:36:05") |
gps_data$datetime >= as.POSIXct("2023-05-06 14:06:10") & gps_data$datetime <= as.POSIXct("2023-05-06 14:09:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:18:45") & gps_data$datetime <= as.POSIXct("2023-05-06 16:21:30") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:23:35") & gps_data$datetime <= as.POSIXct("2023-05-06 16:26:25") |
gps_data$datetime >= as.POSIXct("2023-05-06 16:33:50") & gps_data$datetime <= as.POSIXct("2023-05-06 16:34:40") |
gps_data$datetime >= as.POSIXct("2023-05-06 17:29:25") & gps_data$datetime <= as.POSIXct("2023-05-06 17:32:15") |
gps_data$datetime >= as.POSIXct("2023-05-07 17:34:55") & gps_data$datetime <= as.POSIXct("2023-05-07 18:05:05") |
gps_data$datetime >= as.POSIXct("2023-05-07 18:30:50") & gps_data$datetime <= as.POSIXct("2023-05-07 18:42:30") |
gps_data$datetime >= as.POSIXct("2023-05-07 20:13:00") & gps_data$datetime <= as.POSIXct("2023-05-07 20:15:35") |
gps_data$datetime >= as.POSIXct("2023-05-08 18:56:05") & gps_data$datetime <= as.POSIXct("2023-05-08 19:21:30") |
gps_data$datetime >= as.POSIXct("2023-05-08 19:53:55") & gps_data$datetime <= as.POSIXct("2023-05-08 20:14:55") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:10:10") & gps_data$datetime <= as.POSIXct("2023-05-10 17:14:10") |
gps_data$datetime >= as.POSIXct("2023-05-10 17:28:50") & gps_data$datetime <= as.POSIXct("2023-05-10 17:52:00") |
gps_data$datetime >= as.POSIXct("2023-05-10 18:11:15") & gps_data$datetime <= as.POSIXct("2023-05-10 18:14:00") |
gps_data$datetime >= as.POSIXct("2023-05-10 19:36:45") & gps_data$datetime <= as.POSIXct("2023-05-10 19:42:45") |
gps_data$datetime >= as.POSIXct("2023-05-10 22:56:30") & gps_data$datetime <= as.POSIXct("2023-05-10 23:04:50") |
gps_data$datetime >= as.POSIXct("2023-05-12 07:52:05") & gps_data$datetime <= as.POSIXct("2023-05-12 07:54:35") |
gps_data$datetime >= as.POSIXct("2023-05-12 17:28:25") & gps_data$datetime <= as.POSIXct("2023-05-12 17:30:45") |
gps_data$datetime >= as.POSIXct("2023-05-12 18:25:50") & gps_data$datetime <= as.POSIXct("2023-05-12 18:28:20") |
gps_data$datetime >= as.POSIXct("2023-05-13 17:19:15") & gps_data$datetime <= as.POSIXct("2023-05-13 17:39:00") |
gps_data$datetime >= as.POSIXct("2023-05-13 17:41:40") & gps_data$datetime <= as.POSIXct("2023-05-13 17:45:25") |
gps_data$datetime >= as.POSIXct("2023-05-16 17:44:00") & gps_data$datetime <= as.POSIXct("2023-05-16 17:47:00") |
gps_data$datetime >= as.POSIXct("2023-05-16 17:51:20") & gps_data$datetime <= as.POSIXct("2023-05-16 17:54:05") |
gps_data$datetime >= as.POSIXct("2023-05-17 15:44:15") & gps_data$datetime <= as.POSIXct("2023-05-17 15:48:05") |
gps_data$datetime >= as.POSIXct("2023-05-18 14:21:00") & gps_data$datetime <= as.POSIXct("2023-05-18 14:40:35") |
gps_data$datetime >= as.POSIXct("2023-05-18 14:57:00") & gps_data$datetime <= as.POSIXct("2023-05-18 15:56:00") |
gps_data$datetime >= as.POSIXct("2023-05-18 16:11:45") & gps_data$datetime <= as.POSIXct("2023-05-18 16:14:30") |
gps_data$datetime >= as.POSIXct("2023-05-20 16:39:20") & gps_data$datetime <= as.POSIXct("2023-05-20 16:41:00") |
gps_data$datetime >= as.POSIXct("2023-05-20 17:09:00") & gps_data$datetime <= as.POSIXct("2023-05-20 20:30:00") |
gps_data$datetime >= as.POSIXct("2023-05-20 20:49:40") & gps_data$datetime <= as.POSIXct("2023-05-20 20:53:35") |
gps_data$datetime >= as.POSIXct("2023-05-21 19:37:35") & gps_data$datetime <= as.POSIXct("2023-05-21 20:24:05") |
gps_data$datetime >= as.POSIXct("2023-05-21 20:48:00") & gps_data$datetime <= as.POSIXct("2023-05-21 20:52:20") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:34:05") & gps_data$datetime <= as.POSIXct("2023-05-23 17:36:25") |
gps_data$datetime >= as.POSIXct("2023-05-23 17:58:45") & gps_data$datetime <= as.POSIXct("2023-05-23 19:16:00") |
gps_data$datetime >= as.POSIXct("2023-05-23 19:39:10") & gps_data$datetime <= as.POSIXct("2023-05-23 19:42:10") |
gps_data$datetime >= as.POSIXct("2023-05-24 09:12:05") & gps_data$datetime <= as.POSIXct("2023-05-24 09:17:20") |
gps_data$datetime >= as.POSIXct("2023-05-24 14:50:15") & gps_data$datetime <= as.POSIXct("2023-05-24 14:52:20") |
gps_data$datetime >= as.POSIXct("2023-05-24 15:42:05") & gps_data$datetime <= as.POSIXct("2023-05-24 16:38:35") |
gps_data$datetime >= as.POSIXct("2023-05-25 14:35:55") & gps_data$datetime <= as.POSIXct("2023-05-25 14:40:25") |
gps_data$datetime >= as.POSIXct("2023-05-25 14:55:35") & gps_data$datetime <= as.POSIXct("2023-05-25 14:57:05") |
gps_data$datetime >= as.POSIXct("2023-05-25 15:14:50") & gps_data$datetime <= as.POSIXct("2023-05-25 15:19:15") |
gps_data$datetime >= as.POSIXct("2023-05-25 15:29:00") & gps_data$datetime <= as.POSIXct("2023-05-25 15:35:35") |
gps_data$datetime >= as.POSIXct("2023-05-25 20:06:20") & gps_data$datetime <= as.POSIXct("2023-05-25 20:52:00") |
gps_data$datetime >= as.POSIXct("2023-05-26 07:51:15") & gps_data$datetime <= as.POSIXct("2023-05-26 07:54:15") |
gps_data$datetime >= as.POSIXct("2023-05-26 08:47:15") & gps_data$datetime <= as.POSIXct("2023-05-26 12:17:30") |
gps_data$datetime >= as.POSIXct("2023-05-26 13:31:40") & gps_data$datetime <= as.POSIXct("2023-05-26 13:49:00") |
gps_data$datetime >= as.POSIXct("2023-05-26 14:27:10") & gps_data$datetime <= as.POSIXct("2023-05-26 17:20:10") |
gps_data$datetime >= as.POSIXct("2023-05-28 15:39:00") & gps_data$datetime <= as.POSIXct("2023-05-28 20:19:00") |
gps_data$datetime >= as.POSIXct("2023-05-28 20:57:05") & gps_data$datetime <= as.POSIXct("2023-05-28 21:00:00") |
gps_data$datetime >= as.POSIXct("2023-05-29 17:05:05") & gps_data$datetime <= as.POSIXct("2023-05-29 17:40:00") |
gps_data$datetime >= as.POSIXct("2023-05-29 18:13:30") & gps_data$datetime <= as.POSIXct("2023-05-29 18:18:45") |
gps_data$datetime >= as.POSIXct("2023-05-30 11:33:25") & gps_data$datetime <= as.POSIXct("2023-05-30 11:37:15") |
gps_data$datetime >= as.POSIXct("2023-05-31 17:44:25") & gps_data$datetime <= as.POSIXct("2023-05-31 17:48:00") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:23:45") & gps_data$datetime <= as.POSIXct("2023-06-02 13:28:00") |
gps_data$datetime >= as.POSIXct("2023-06-02 13:55:35") & gps_data$datetime <= as.POSIXct("2023-06-02 14:19:00") |
gps_data$datetime >= as.POSIXct("2023-06-02 14:39:30") & gps_data$datetime <= as.POSIXct("2023-06-02 14:47:05") |
gps_data$datetime >= as.POSIXct("2023-06-02 15:16:45") & gps_data$datetime <= as.POSIXct("2023-06-02 15:20:25") |
gps_data$datetime >= as.POSIXct("2023-06-04 17:00:50") & gps_data$datetime <= as.POSIXct("2023-06-04 17:06:30") |
gps_data$datetime >= as.POSIXct("2023-06-04 17:17:20") & gps_data$datetime <= as.POSIXct("2023-06-04 18:17:00") |
gps_data$datetime >= as.POSIXct("2023-06-04 19:04:45") & gps_data$datetime <= as.POSIXct("2023-06-04 19:08:00") |
gps_data$datetime >= as.POSIXct("2023-06-05 12:51:35") & gps_data$datetime <= as.POSIXct("2023-06-05 12:56:40") |
gps_data$datetime >= as.POSIXct("2023-06-05 14:49:00") & gps_data$datetime <= as.POSIXct("2023-06-05 14:55:25") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:28:00") & gps_data$datetime <= as.POSIXct("2023-06-05 15:36:00") |
gps_data$datetime >= as.POSIXct("2023-06-05 15:42:40") & gps_data$datetime <= as.POSIXct("2023-06-05 15:46:05") |
gps_data$datetime >= as.POSIXct("2023-06-06 20:00:05") & gps_data$datetime <= as.POSIXct("2023-06-06 20:34:25") |
gps_data$datetime >= as.POSIXct("2023-06-08 20:55:10") & gps_data$datetime <= as.POSIXct("2023-06-08 21:22:10") |
gps_data$datetime >= as.POSIXct("2023-06-09 13:54:10") & gps_data$datetime <= as.POSIXct("2023-06-09 17:14:20") |
gps_data$datetime >= as.POSIXct("2023-06-10 18:50:20") & gps_data$datetime <= as.POSIXct("2023-06-10 20:45:00") |
gps_data$datetime >= as.POSIXct("2023-06-10 21:03:15") & gps_data$datetime <= as.POSIXct("2023-06-10 21:05:40") |
gps_data$datetime >= as.POSIXct("2023-06-12 21:29:35") & gps_data$datetime <= as.POSIXct("2023-06-12 22:11:45") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:43:25") & gps_data$datetime <= as.POSIXct("2023-06-15 21:45:00") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:52:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:55:05") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:58:05") & gps_data$datetime <= as.POSIXct("2023-06-15 21:59:55") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:02:00") & gps_data$datetime <= as.POSIXct("2023-06-15 22:07:15") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:14:45") & gps_data$datetime <= as.POSIXct("2023-06-15 22:19:00") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:25:25") & gps_data$datetime <= as.POSIXct("2023-06-15 22:28:20") |
gps_data$datetime >= as.POSIXct("2023-06-16 10:17:45") & gps_data$datetime <= as.POSIXct("2023-06-16 10:21:25")
condition_boat <-
gps_data$datetime >= as.POSIXct("2023-06-02 14:20:00") & gps_data$datetime <= as.POSIXct("2023-06-02 14:37:20")
condition_run <-
gps_data$datetime >= as.POSIXct("2023-05-21 20:24:10") & gps_data$datetime <= as.POSIXct("2023-05-21 20:24:55") |
gps_data$datetime >= as.POSIXct("2023-06-08 20:59:00") & gps_data$datetime <= as.POSIXct("2023-06-08 21:17:55") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:45:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:52:00") |
gps_data$datetime >= as.POSIXct("2023-06-15 21:55:10") & gps_data$datetime <= as.POSIXct("2023-06-15 21:57:25") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:00:00") & gps_data$datetime <= as.POSIXct("2023-06-15 22:01:55") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:07:40") & gps_data$datetime <= as.POSIXct("2023-06-15 22:14:40") |
gps_data$datetime >= as.POSIXct("2023-06-15 22:19:45") & gps_data$datetime <= as.POSIXct("2023-06-15 22:20:55")
# Labelizing data
gps_data <- gps_data %>%
mutate(tmode_manual = case_when(condition_walk ~ 'walk',
condition_tram ~ 'tram',
condition_bus ~ 'bus',
condition_boat ~ 'boat',
condition_run ~ 'run',
is.na(gps_data$tmode_manual) == TRUE ~ 'unclassified'
))
In the GPS tracker dataset, have been recorded the following transport modes, ordered from the highest number of points (n) to the lowest:
#count of of points per means of transports:
gps_tmode_count <- gps_data %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(gps_tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport - GPS tracker")
| tmode_manual | n |
|---|---|
| walk | 9746 |
| tram | 7542 |
| unclassified | 7343 |
| bus | 929 |
| run | 245 |
| boat | 203 |
Merge GPS tracker and Posmo datasets
#here merge with gps tracker
mvmt_data <- rbind(posmo, gps_data)
col_order <- c("source", "datetime", "tmode_manual" ,"E", "N", "H", "geometry")
mvmt_data <- mvmt_data[, col_order]
# add regions (for visualisation)
#Zurich
zh_ll <- c(2674900, 1244000)
zh_ur <- c(2692100, 1260300)
#Waadt
wt_ll <- c(2480000, 1127600)
wt_ur <- c(2575200, 1195100)
#wallis
ws_ll <- c(2538000, 1076875)
ws_ur <- c(2661500, 1153125)
# Import the datasets
ch_boundaries <- read_sf(here(data_folder, "boundaries", "CH_boundaries.shp")) %>% st_zm()
Kanton_boundaries <- read_sf(here(data_folder, "boundaries", "Kanton_boundaries.shp")) %>% st_zm()
waadt <- Kanton_boundaries %>% filter(KANTONSNUM == 22)
zh_commune <- read_sf(here(data_folder, "boundaries", "ZH_gemeinde.shp")) %>% st_zm()
mvmt_data$region <- NA
mvmt_data <- mvmt_data |>
mutate(
region = case_when(
E > zh_ll[1] & E < zh_ur[1] & N > zh_ll[2] & N < zh_ur[2] ~ 'Zurich',
E > wt_ll[1] & E < wt_ur[1] & N > wt_ll[2] & N < wt_ur[2] ~ 'Waadt',
E > ws_ll[1] & E < ws_ur[1] & N > ws_ll[2] & N < ws_ur[2] ~ 'Wallis'
)
)
Here is the final total of recorded points and the distribution within the means of transport:
#count of points:
tmode_count <- mvmt_data %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport")
| tmode_manual | n |
|---|---|
| unclassified | 19575 |
| walk | 18321 |
| tram | 9506 |
| car | 9422 |
| bus | 6009 |
| train | 5075 |
| bike | 2306 |
| ski | 1795 |
| ski_lift | 1148 |
| plane | 465 |
| boat | 288 |
| run | 245 |
| cable_car | 225 |
| t_bar | 183 |
| kick-scooter | 149 |
Map general overview - transport modes:
mvmt_data_classified <- mvmt_data %>% filter(tmode_manual != "unclassified")
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "All recorded trajectory points") +
geom_point(data = mvmt_data_classified, aes(E, N, color = tmode_manual)) +
coord_sf(datum=2056) +
theme_bw()
Map local overview - Zurich:
mvmt_data_zh <- mvmt_data_classified %>% filter(region == "Zurich")
ggplot() +
geom_sf(data = zh_commune) +
labs(title = "Recorded trajectory points - Zurich city") +
geom_point(data = mvmt_data_zh, aes(E, N, color = tmode_manual)) +
coord_sf(datum=2056) +
theme_bw()
Map local overview - Waadt:
mvmt_data_waadt <- mvmt_data_classified %>% filter(region == "Waadt")
ggplot() +
geom_sf(data = waadt) +
labs(title = "Recorded trajectory points - Waadt kanton") +
geom_point(data = mvmt_data_waadt, aes(E, N, color = tmode_manual)) +
coord_sf(datum=2056) +
theme_bw()
The points will be segmentized based on the user, gaps in recording and the labelized transport mode. The static points will be detected and removed from the dataset. Finally, to have multiple segments for machine learning, the segments will again be segmentized per duration with a defined threshold.
Compute raw trajectory derivatives
We will need the timelag and steplength between each points. The timelag
will be used for detecting the gaps, and the steplength to find the
static points.
The points with a timelag of 0 have been removed after this step. After
inspection, it seems these are points created by posmo processing of the
data: At each change of transport mode, the point is duplicate with
different coordinates.
Detect gaps in recording
Posmo recording timerate is of 10 seconds, and the gps tracker of 5
sec. We will use a threshold value above these numbers which has been
decided for 30 seconds. We estimated under 30 seconds, the gaps are not
too important to define the derivative (max 3 missing points in posmo
and max. 6 in GPS tracker).
# 1) Compute raw trajectory derivatives
# Speed and steplength
mvmt_data <-
mvmt_data %>%
group_by(source) %>%
mutate(
timelag = as.numeric(difftime(lead(datetime), datetime)),
steplength_m = sqrt((E-lead(E))^2 + (N-lead(N))^2), # Horizontal steplength
steplength_h = H-lead(H), # Add vertical steplength
speed_ms = abs(steplength_m)/timelag, # Horizontal speed
speed_v = abs(steplength_h)/timelag, # Vertical speed
slope = (lead(H) - H)/steplength_m * 100
)
# Turning angle
trackAngle <- function(Ecol, Ncol){
coords <- data.matrix(cbind(Ecol, Ncol))
n <- length(Ecol)
if(n > 2){
angles <- abs(c(trackAzimuth(coords), 0) -
c(0, rev(trackAzimuth(coords[nrow(coords):1, ]))))
# angles <- ifelse(angles > 180, 360 - angles, angles) #[0,180] to [0,360]
angles[is.na(angles)] <- 180
return(c(0, angles[-c(1, length(angles))], 0))
}
else{
return(numeric(n))
}
}
#remove timelag 0 -> recording errors
mvmt_data <- mvmt_data %>% filter(timelag != 0)
# 2) Find gaps in the data
mvmt_data_gap <- mvmt_data |>
ungroup() |>
mutate(gap = timelag > 30)
Segmentation per gap, user and transport mode
A unique segment ID is attributed at each point based on the user,
transport mode, and gaps. Three individual indexes for those are
created. The points are traversed in user and datetime order, and at
each change of user or transport mode, the indexes are increased. For
the gap, the last point before the gap closes the current segment index.
Finally, the three indexes are combined to form unique segments.
Once the segments are created, the total duration could be calculated as well as meansteps at different window sizes. We looked at 5 points before and after the considered points, so we could calculate three mean values of the steplength: one small window (only 1 value before/after), one medium window (3 before/after), and one big window (5 before/after). These different steplength means will be used for the static calculation and to define the characteristics of transport modes.
# Increment an index at each change of value in the vec column.
# function source: GEO880 - Exercise3
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}
mvmt_data_segmentized <- mvmt_data_gap %>%
mutate(
segment_ID_user = rle_id(source),
segment_ID_gap = 1,
segment_ID_tmode = rle_id(tmode_manual)
)
# Gap: Increment segment_ID_gap for each TRUE
# Takes some time: to optimize
for (i in 2:nrow(mvmt_data_segmentized)) {
if (mvmt_data_segmentized$gap[i-1] == TRUE) {
mvmt_data_segmentized$segment_ID_gap[i] <- mvmt_data_segmentized$segment_ID_gap[i-1] + 1
} else {
mvmt_data_segmentized$segment_ID_gap[i] <- mvmt_data_segmentized$segment_ID_gap[i-1]
}
}
# ADD PRE-FINAL ID for segmentation
mvmt_data_segmentized <- mvmt_data_segmentized %>%
mutate(
segment_ID_both = paste(segment_ID_user, segment_ID_gap, segment_ID_tmode, sep = "-")
)
# Add duration per segment
mvmt_data_segmentized <- mvmt_data_segmentized %>%
group_by(segment_ID_both) %>%
mutate(duration_secs = as.integer(difftime(max(datetime), min(datetime), units = "secs")))
# Calculate distances to previous/next points
mvmt_data_segmentized <- mvmt_data_segmentized |>
group_by(segment_ID_both) %>%
mutate(
nMinus5 = sqrt((lag(E, 5) - E)^2 + (lag(N, 5) - N)^2),
nMinus4 = sqrt((lag(E, 4) - E)^2 + (lag(N, 4) - N)^2), #...
nMinus3 = sqrt((lag(E, 3) - E)^2 + (lag(N, 3) - N)^2), #dist to pos -30/20s
nMinus2 = sqrt((lag(E, 2) - E)^2 + (lag(N, 2) - N)^2), #dist to pos -20/10s
nMinus1 = sqrt((lag(E, 1) - E)^2 + (lag(N, 1) - N)^2), #dist to pos -10/5s
nPlus1 = sqrt((E - lead(E, 1))^2 + (N - lead(N, 1))^2), #dist to pos +10/5s
nPlus2 = sqrt((E - lead(E, 2))^2 + (N - lead(N, 2))^2), #dist to pos +20/10s
nPlus3 = sqrt((E - lead(E, 3))^2 + (N - lead(N, 3))^2), #dist to pos +30/20s
nPlus4 = sqrt((E - lead(E, 4))^2 + (N - lead(N, 4))^2), #...
nPlus5 = sqrt((E - lead(E, 5))^2 + (N - lead(N, 5))^2),
)
# Calculate meanstep at three different windows: small, middle and big
mvmt_data_segmentized <- mvmt_data_segmentized |>
rowwise() |>
mutate(
stepMean_smallw = round(mean(c(nMinus1, nPlus1)),2),
stepMean_midw = round(mean(c(nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3)),2),
stepMean_bigw = round(mean(c(nMinus5, nMinus4, nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3, nPlus4, nPlus5)),2)
) |>
ungroup()
# Calculate turning angle of each point in each segment
mvmt_data_segmentized <- mvmt_data_segmentized %>%
group_by(segment_ID_both) %>%
mutate(turning_angle = trackAngle(E, N))
# Calculate acceleration for each point in each segment
mvmt_data_segmentized <- mvmt_data_segmentized %>%
group_by(segment_ID_both) %>%
mutate(acceleration_h = (speed_ms - lead(speed_ms))/timelag,
acceleration_v = (speed_v - lead(speed_v)/timelag))
# Remove NA with 0 in acceleration columns
mvmt_data_segmentized <- mvmt_data_segmentized %>%
mutate(
acceleration_h = replace(acceleration_h, is.na(acceleration_h), 0),
acceleration_v = replace(acceleration_v, is.na(acceleration_v), 0),
)
Determine static points, and remove static and small
segments
The static is calculated per segment, by comparing the mean steplength
of the big window to the one from the small window. This method was
chosen because some transport modes have variances in speed (stops in
public transport, or traffic jams, light stops, …). Therefore, a
comparison with the overall segment was not ideal.
Static points with a steplength of less than 5m were also considered as
statics.
Finally, segments with a duration of less than 30 seconds were
removed.
The information about static was still kept in the segments by
calculating a percentage of static, as this could be useful to determine
the mean of transport.
#determine static points: choose an average distance to use as the threshold
mvmt_data_add_static <- mvmt_data_segmentized |>
group_by(segment_ID_both) %>%
mutate(
static =
stepMean_bigw < mean(stepMean_smallw, na.rm=TRUE) |
steplength_m < 0.5,
static = ifelse(is.na(static), FALSE, static)
)
# Add percentage of static in each segment
mvmt_data_add_static <- mvmt_data_add_static |>
group_by(segment_ID_both) %>%
mutate(
static_percent = (sum(static))/(length(static))
# Optimize by only considering continuous static points
)
# Filter static and small segments
mvmt_data_filtered <- mvmt_data_add_static %>%
group_by(segment_ID_both, source, tmode_manual, duration_secs, static, static_percent) %>%
# remove static segments
filter(static == FALSE) %>%
#remove the too small segments (duration < 30 s)
filter(duration_secs > 30)
Final segmentation per duration
In order to have more segments of each transport modes for the machine
learning, the long segments were segmentized based on the duration. The
maximum duration considered was 100 seconds. However, to not create very
small parts, when the last resegmentized part was smaller than 50
seconds, this was added to the penultimate part.
# Segmentize per duration
mvmt_data_split <- mvmt_data_filtered %>%
group_by(segment_ID_both) %>%
mutate(
#remove timelag to next segment
timelag = case_when(timelag > 30 ~ 0,
timelag <= -20 ~ 0,
.default = timelag),
duration_index = case_when(
(duration_secs > 100) & (duration_secs %% 100) >= 50 ~ ceiling(cumsum(timelag)/100),
(duration_secs > 100) & (duration_secs %% 100) < 50 ~ ceiling((cumsum(timelag)/100)-1),
.default = 0
),
segment_ID_final = paste(segment_ID_both, duration_index, sep = "-")
)
# Remove unwanted columns
mvmt_seg_final <- mvmt_data_split %>%
dplyr::select(-c("segment_ID_user", "segment_ID_both", "segment_ID_gap", "duration_index", "segment_ID_tmode", "nMinus5", "nMinus4", "nMinus3", "nMinus2", "nMinus1", "nPlus1", "nPlus2", "nPlus3", "nPlus4", "nPlus5"
))
Here is the summary of the resulting number of segments, ordered from the most to last:
# Count no of segs per means of transports:
mvmt_seg_count <- mvmt_seg_final %>% st_drop_geometry() %>% group_by(segment_ID_final, tmode_manual) %>% summarize() %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(mvmt_seg_count,
caption = "Number of segments per mode of transport")
| tmode_manual | n |
|---|---|
| unclassified | 1349 |
| walk | 1234 |
| car | 813 |
| tram | 526 |
| train | 465 |
| bus | 437 |
| bike | 194 |
| ski | 137 |
| ski_lift | 107 |
| plane | 42 |
| cable_car | 21 |
| boat | 20 |
| t_bar | 18 |
| run | 16 |
| kick-scooter | 13 |
Segments visualisation - example
Here we take an example with both posmo and gps data, at different steps of the analysis.
# MAP VISUALISATIONS
# Final segments per transport modes
visu_data <- mvmt_seg_final %>% filter(as.Date(datetime) == "2023-05-16")
n_tmode <- visu_data$tmode_manual %>% unique() %>% length()
visu_data_wgs <- visu_data %>% st_transform(crs = 4326)
set.seed(2)
factpal <- colorFactor(topo.colors(n_tmode), domain =visu_data_wgs$tmode_manual)
m <- leaflet() %>%
#addTiles() %>% # Add default OpenStreetMap map tiles
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data_wgs,
#opacity = 0.3,
radius = 0.2,
popup = paste("seg_ID: ", visu_data_wgs$segment_ID_final),
color = ~factpal(tmode_manual)) %>%
addLegend(position = 'topright',
#colors = c('red'),
#labels = c('static'),
pal = factpal,
values = visu_data_wgs$tmode_manual,
title = '2023-05-16: Final segments per transport mode')
## Plot the leaflet object m
m
# Gaps
visu_gaps_static <- mvmt_data_add_static %>% filter(as.Date(datetime) == "2023-05-16")
visu_gaps_static |>
ggplot(aes(E, N, color = gap)) +
#geom_path() +
geom_point() +
coord_fixed() +
coord_equal() +
labs(title = "2023-05-16: Identified gaps") +
theme(legend.position = "right")
# Static
visu_gaps_static |>
ggplot(aes(E, N, color = static)) +
#geom_path() +
geom_point() +
coord_fixed() +
coord_equal() +
labs(title = "2023-05-16: Identified static points") +
theme(legend.position = "right")
# Final segments
visu_data |>
ggplot(aes(E, N, color = segment_ID_final)) +
#geom_path() +
geom_point() +
coord_fixed() +
coord_equal() +
labs(title = "2023-05-16: Final segments") +
theme(legend.position = "right")
After assigning unique ID for every segments, we then calculated the following characteristics of points in each trajectory:
We summarized derivatives for each trajectory by calculating mean value, standard deviation, and range of the aforementioned characteristics.
# SUMMARIZE
mvmt_seg_summary <- mvmt_seg_final %>%
group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
summarize(
# Total segments
#time_duration = duration_secs,
space_duration = sqrt((sum(abs(steplength_m))^2 + (sum(abs(steplength_h))^2))),
stepMean_midw_mean = mean(stepMean_midw, na.rm=TRUE),
# Step length
steplength_mean = mean(steplength_m),
steplength_sd = sd(steplength_m),
# Horizontal speed
speed_h_mean = mean(speed_ms),
speed_h_sd = sd(speed_ms),
speed_h_range = max(speed_ms) - min(speed_ms),
# Vertical speed
speed_v_mean = mean(speed_v),
speed_v_sd = sd(speed_v),
speed_v_range = max(speed_v) - min(speed_v),
# Horizontal Acceleration
a_h_mean = mean(acceleration_h),
a_h_sd = sd(acceleration_h),
a_h_range = max(acceleration_h) - min(acceleration_h),
# Vertical Acceleration
a_v_mean = mean(acceleration_v),
a_v_sd = sd(acceleration_v),
a_v_range = max(acceleration_v) - min(acceleration_v),
# Sinuosity
ta_mean = mean(turning_angle),
ta_sd = sd(turning_angle),
ta_range = max(turning_angle) - min(turning_angle),
# Geometry
lst_geometry = list(geometry))
Each point will be annotated with the information TRUE/FALSE of its
proximity to the following environment elements:
- Railways (train)
- Trams (rails)
- Bus stops
- Highways (“Autobahn” and “Autostrasse”)
- Cable-cars, ski-lifts and t-bars
- Lakes
- Mountainous area (altitude)
The railways for the trains and trams come from map.geoadmin.ch. The
lakes, bus stops, highways and cable-cars & co comes from SwissTLM3D
(swisstopo). Due to the number of water elements, only the biggest lakes
have been considered. For the highways, it was decided to only take this
category of road because this was the most helpful to separate with the
other transport modes. Classical roads, in addition of being all other
the territory, allow other transport modes such as bikes or walking. Bus
networks lines were not available from swisstopo for downloading for
whole Switzerland coverage, therefore the stops points were used.
The distance considered for proximity has been chosen individually for
each transport mode, based on inspection of all the points labelized in
the transport mode. Finally, the altitude simply used the heights of the
points and applies a threshold. The limit of 1200m has been chosen, as
it has been observed that most ski stations in the country are above
this limit.
# Calculate concave hull based on the trajectory points
concave_hull <- concaveman(mvmt_seg_final, concavity = 2, length_threshold=50) %>% st_buffer(500)
# Join the trajectory points with the environment datasets, by using the concave hull on the shp and to limit calculation time. Dissolve and simplification also reduce the calculation time.
# 1) RAILS
rails_shp <- read_sf(here(data_folder, "railways", "rails.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "rails") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(rails_shp, join = st_is_within_distance, 30)
mvmt_seg_final$isClosetoRails <- !is.na(joined_data$environment)
# 2) TRAMS
trams_shp <- read_sf(here(data_folder, "railways", "trams.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "trams") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(trams_shp, join = st_is_within_distance, 20)
mvmt_seg_final$isClosetoTrams <- !is.na(joined_data$environment)
# 3) BUS STOPS
bus_shp <- read_sf(here(data_folder, "stops", "bus_stops.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "bus")
joined_data <- mvmt_seg_final %>%
st_join(bus_shp, join = st_is_within_distance, 50)
mvmt_seg_final$isClosetoBus <- !is.na(joined_data$environment)
# 4) HIGHWAYS
# Highways have first been extracted from the whole swissTLM3D roads datasets by selecting "Autobahn" and "Autostrasse". This is not in the script, due to the huges amount of segments.
roads_shp <- read_sf(here(data_folder, "roads", "roads_dissolved.shp")) %>% st_zm() %>% st_intersection(concave_hull) %>% mutate(environment = "cars") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(roads_shp, join = st_is_within_distance, 20)
mvmt_seg_final$isClosetoHighways <- !is.na(joined_data$environment)
# 5) LAKES
# Larges lakes have been preselected by defining a threshold of surface ("surface">=10000000)
lakes_shp <- read_sf(here(data_folder, "lakes", "large_lakes.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_intersection(concave_hull) %>% mutate(environment = "lakes") %>% st_simplify(dTolerance = 5)
joined_data <- mvmt_seg_final %>% st_join(lakes_shp, join = st_is_within_distance, 1)
mvmt_seg_final$isClosetoLakes <- !is.na(joined_data$environment)
# 6) SKI LIFTS AND CABLE CARS
cables_shp <- read_sf(here(data_folder, "ski_lift_cable_car", "skilift_cablecar_raw.shp")) %>% st_zm() %>% st_union() %>% st_as_sf() %>% st_transform(crs=2056) %>% st_intersection(concave_hull) %>% mutate(environment = "cable_cars") %>% st_simplify(dTolerance = 1)
joined_data <- mvmt_seg_final %>%
st_join(cables_shp, join = st_is_within_distance, 10)
mvmt_seg_final$isClosetoCables <- !is.na(joined_data$environment)
# 7) HIGH ALTITUDES
mvmt_seg_final <- mvmt_seg_final %>% mutate(
isHighAltitude = case_when(H >= 1200.0 ~ TRUE,
.default = FALSE)
)
Once the points have been annotated, a proximity for the whole segments was defined. A threshold based on percentage of proximity was defined, again individually for each mode to optimize the correct classification.
# SUMMARIZE ANNOTATIONS
mvmt_seg_annotations_summarized <- mvmt_seg_final %>%
group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
summarize(
isClosetoLakes = (sum(isClosetoLakes) / n() * 100 ) > 0,
isClosetoRails = (sum(isClosetoRails) / n() * 100 ) > 70.0,
isClosetoBus = (sum(isClosetoBus) / n() * 100 ) > 30.0,
isClosetoTrams = (sum(isClosetoTrams) / n() * 100 ) > 70.0,
isClosetoHighways = (sum(isClosetoHighways) / n() * 100 ) > 80.0,
isClosetoCables = (sum(isClosetoCables) / n() * 100 ) > 80.0,
isHighAltitude = (sum(isHighAltitude) / n() * 100 ) > 80.0
)
Visualisation of the environment datasets:
# Rails
ggplot() +
geom_sf(data = rails_shp) +
labs(title = "Railways - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoRails), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Trams
ggplot() +
geom_sf(data = trams_shp) +
labs(title = "Trams - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoTrams), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Bus stops
ggplot() +
geom_sf(data = bus_shp, size=0.5) +
labs(title = "Bus stops - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoBus), size=1, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Highways
ggplot() +
geom_sf(data = roads_shp) +
labs(title = "Highways - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoHighways), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Lakes
ggplot() +
geom_sf(data = lakes_shp) +
labs(title = "Lakes - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoLakes), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
# Ski lifts and cable cars
ggplot() +
geom_sf(data = cables_shp) +
labs(title = "Cable car and ski lifts - Proximity of the recorded trajectory points") +
geom_point(data = mvmt_seg_final, aes(E, N, color = isClosetoCables), size=0.5, alpha=0.5) +
coord_sf(datum=2056) +
theme_bw()
Visualisation - university day
On the map below in Zurich, can be observed trams, bus and walk travel
modes.
# Visu proximity to environment for specific days
# Zurich - trams, bus and walk
visu_data <- mvmt_seg_final %>% filter(as.Date(datetime) == "2023-05-16") %>% st_transform(crs=4326)
# Cut environment data for vizualisation
c_hull <- concaveman(visu_data, concavity = 2) %>% st_buffer(500)
trams_c <- trams_shp %>% st_transform(crs = 4326) %>% st_intersection(c_hull)
bus_c <- bus_shp %>% st_transform(crs = 4326) %>% st_intersection(c_hull) %>% st_cast("POINT")
visu_bus <- visu_data %>% filter(isClosetoBus)
visu_tram <- visu_data %>% filter(isClosetoTrams)
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addPolylines(data = trams_c,
color = "black",
opacity = 0.5) %>%
addCircleMarkers(data = bus_c,
color = "#696969",
radius = 10,
fillOpacity = 0.0) %>%
addCircleMarkers(data = visu_data,
radius = 0.2,
popup = paste(
" tmode: ", visu_data$tmode_manual
),
color = "#DAA520") %>%
addCircleMarkers(data = visu_bus,
opacity = 0.8,
radius = 5,
popup = paste(
" tmode: ", visu_bus$tmode_manual
),
color = "#0000FF") %>%
addCircleMarkers(data = visu_tram,
opacity = 0.8,
radius = 1,
popup = paste(
" tmode: ", visu_tram$tmode_manual
),
color = "#FF0000") %>%
addLegend(position = 'topright',
colors = c("#DAA520", "#0000FF", "#FF0000", "#696969", "black"),
labels = c('All points', "isCloseToBus", "isCloseToTrams", "Circle: bus stops", "Lines: trams"),
title = '2023-05-16: Proximity - Lecture day')
## Plot the leaflet object m
m
Visualisation - ski day
On this map below in Grimentz, Wallis, can be seen the travel modes ski,
cable-car, ski-lift and t-bar.
# Visu proximity to environment for specific days
# Grimentz - ski, ski-lift, cable-car, t-bar
visu_data <- mvmt_seg_final %>% filter(as.Date(datetime) == "2023-04-13") %>% st_transform(crs=4326)
# Cut environment data for vizualisation
c_hull <- concaveman(visu_data, concavity = 2) %>% st_buffer(1000)
cables_c <- cables_shp %>% st_transform(crs = 4326) %>% st_intersection(c_hull)
visu_cables <- visu_data %>% filter(isClosetoCables)
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addPolylines(data = cables_c,
color = "black",
opacity = 0.5) %>%
addCircleMarkers(data = visu_data,
radius = 0.2,
popup = paste(
" tmode: ", visu_data$tmode_manual
),
color = "#DAA520") %>%
addCircleMarkers(data = visu_cables,
opacity = 0.8,
radius = 1,
popup = paste(
" tmode: ", visu_cables$tmode_manual
),
color = "#0000FF") %>%
addLegend(position = 'topright',
colors = c("#DAA520", "#0000FF", "black"),
labels = c('All points', "isClosetoCables", "Lines: cable car, ski-lifts, t-bars"),
title = '2023-04-13: Proximity - ski day')
## Plot the leaflet object m
m
In this part, we performed statistical analysis on each trajectory derivative in order to answer the first research question, which is to see whether movement characteristics differ between different transport modes.
For all the 28 derivatives calculated, we applied pairwise t-test to every two transport modes. And based on the result of t-test, we then could tell if one derivative is significantly different between two transport modes. We then calculated the percentage of significantly different groups among all groups.
According to the following table, the mean percentage of significant groups is 47%, and 15 derivatives have a significant percent more than 50%. The top five derivatives which differs most significantly between transport modes are mean vertical acceleration (a_v_mean), mean vertical speed (speed_v_mean), percentage of static points (static_percent), range of vertical acceleration (a_v_range), standard deviation of vertical acceleration (a_v_sd).
Based on the result, we could say most derivatives we calculated from movement characteristics differ significantly between transport mode. And therefore, we believe using these derivatives to identify travel mode is feasible.
#count of segments:
tmode_count <- mvmt_data %>% group_by(tmode_manual) %>% count() %>% arrange(desc(n))
knitr::kable(tmode_count %>% st_drop_geometry(),
caption = "Number of points labelized per mode of transport")
| tmode_manual | n |
|---|---|
| unclassified | 18306 |
| walk | 18084 |
| tram | 9490 |
| car | 9282 |
| bus | 5938 |
| train | 5012 |
| bike | 2290 |
| ski | 1775 |
| ski_lift | 1148 |
| plane | 465 |
| boat | 283 |
| run | 245 |
| cable_car | 221 |
| t_bar | 182 |
| kick-scooter | 144 |
# extrafont::font_import()
# Clean data for statistics
mvmt_seg_annotations_summarized_value <- mvmt_seg_final %>%
group_by(segment_ID_final, static_percent, duration_secs, tmode_manual) %>%
summarize(
isClosetoLakes = sum(isClosetoLakes) / n() * 100,
isClosetoRails = sum(isClosetoRails) / n() * 100,
isClosetoBus = sum(isClosetoBus) / n() * 100,
isClosetoTrams = sum(isClosetoTrams) / n() * 100,
isClosetoHighways = sum(isClosetoHighways) / n() * 100 ,
isClosetoCables = sum(isClosetoCables) / n() * 100,
isHighAltitude = sum(isHighAltitude) / n() * 100
) %>%
st_drop_geometry()
## `summarise()` has grouped output by 'segment_ID_final', 'static_percent',
## 'duration_secs'. You can override using the `.groups` argument.
# Merge result of derivative and annotation
mvmt_sta <- merge(st_drop_geometry(mvmt_seg_summary),
mvmt_seg_annotations_summarized_value,
by = c('segment_ID_final', 'tmode_manual',
'duration_secs', 'static_percent'))
# Remove 'unclassified' class and 'lst_geometry' column
mvmt_sta <- mvmt_sta %>%
dplyr::select(-c("lst_geometry")) %>%
filter(tmode_manual != "unclassified")
# Remove NA
mvmt_sta <- na.omit(mvmt_sta)
# T test for each derivative
ttesttable <- data.frame(matrix(nrow = 30, ncol = 5))
rownames(ttesttable) <- colnames(mvmt_sta)
colnames(ttesttable) <- c("*", "**", "***", "****", "ns")
ttesttable <- tail(ttesttable, -2)
index <- data.frame(matrix(nrow = 5, ncol = 2))
index[,1] <- c("*", "**", "***", "****", "ns")
index[,2] <- numeric(5)
colnames(index) <- c("p.signif", "n")
pwc <- mvmt_sta %>%
pairwise_t_test(duration_secs ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[1,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(static_percent ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[2,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(space_duration ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[3,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(stepMean_midw_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[4,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(steplength_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[5,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(steplength_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[6,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_h_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[7,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_h_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[8,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_h_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[9,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_v_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[10,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_v_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[11,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(speed_v_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[12,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_h_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[13,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_h_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[14,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_h_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[15,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_v_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[16,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_v_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[17,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(a_v_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[18,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(ta_mean ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[19,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(ta_sd ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[20,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(ta_range ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[21,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoLakes ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[22,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoRails ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[23,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoBus ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[24,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoTrams ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[25,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoHighways ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[26,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isClosetoCables ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[27,] <- b$n
pwc <- mvmt_sta %>%
pairwise_t_test(isHighAltitude ~ tmode_manual,
p.adjust.method = "bonferroni")
a <- pwc%>%count(p.signif)
b <- merge(index, a, by = "p.signif", all = TRUE)
b <- replace(b,is.na(b),0)
b$n <- b$n.x + b$n.y
ttesttable[28,] <- b$n
ttesttable$significant_percent <- (ttesttable$`*`
+ ttesttable$`**`
+ ttesttable$`***`
+ ttesttable$`****`)/
(ttesttable$`*` + ttesttable$`**` + ttesttable$`***` +
ttesttable$`****` + ttesttable$ns)
# ttesttable
ttesttable[order(ttesttable$significant_percent,decreasing=TRUE),]
## * ** *** **** ns significant_percent
## a_v_mean 3 2 4 60 22 0.75824176
## speed_v_mean 1 2 3 58 27 0.70329670
## static_percent 5 12 9 35 30 0.67032967
## a_v_range 5 2 2 52 30 0.67032967
## a_v_sd 4 1 4 51 31 0.65934066
## speed_v_sd 4 11 6 38 32 0.64835165
## speed_v_range 5 9 8 37 32 0.64835165
## ta_range 6 7 11 35 32 0.64835165
## ta_sd 5 4 5 41 36 0.60439560
## ta_mean 6 2 4 40 39 0.57142857
## duration_secs 5 3 4 39 40 0.56043956
## isClosetoBus 7 10 2 31 41 0.54945055
## isHighAltitude 1 2 2 45 41 0.54945055
## speed_h_mean 3 2 1 41 44 0.51648352
## isClosetoTrams 6 8 4 28 45 0.50549451
## stepMean_midw_mean 1 1 2 40 47 0.48351648
## isClosetoCables 0 0 0 43 48 0.47252747
## isClosetoRails 4 7 4 24 52 0.42857143
## space_duration 0 7 6 18 60 0.34065934
## steplength_mean 6 9 2 14 60 0.34065934
## isClosetoLakes 0 0 0 25 66 0.27472527
## speed_h_range 6 7 3 6 69 0.24175824
## speed_h_sd 5 8 2 6 70 0.23076923
## a_h_range 7 3 4 6 71 0.21978022
## a_h_sd 7 4 4 3 73 0.19780220
## isClosetoHighways 1 0 0 13 77 0.15384615
## steplength_sd 1 0 3 4 83 0.08791209
## a_h_mean 3 3 1 1 83 0.08791209
#maps per mean of transport?:
mvmt_data_filter <- mvmt_data %>% filter(tmode_manual == "plane") #%>% filter(!is.na(tmode_manual)) #%>% filter(region == "Zurich")
mvmt_data_filter_wgs <- mvmt_data_filter %>% st_transform(crs = 4326)
m <- leaflet() %>%
addTiles() %>% # Add default OpenStreetMap map tiles
addCircleMarkers(data = mvmt_data_filter_wgs,
#opacity = 0.3,
radius = 0.2,
popup = mvmt_data_filter_wgs$datetime,
color = 'blue') %>%
addLegend(position = 'topright',
colors = c('blue'),
labels = c('points'),
title = 'All segments labelized as (selected)')
# Plot the leaflet object m
m
Two different supervised algorithms will be tested:
- K-Nearest Neighbors
- Random forest
We suspect random forest will perform better, as this algorithm is more suitable to categorical data.
Common issues in machine learning: imbalance training sets (not much boat against walk segments)
The segments annotated with trajectory derivatives and environment will be splitted into training and testing parts. We will validate the model by doing a K-fold cross validation.
# Drop geometry
# Merge all variables
mvmt_all <- merge(st_drop_geometry(mvmt_seg_summary),
st_drop_geometry(mvmt_seg_annotations_summarized),
by = c('segment_ID_final', 'tmode_manual',
'duration_secs', 'static_percent'))
# Filter the 'unclassified' records
mvmt_all <- mvmt_all %>% filter(tmode_manual != "unclassified")
# Remove
mvmt_all <- na.omit(mvmt_all)
# Label travel mode
mvmt_all$label <- as.factor(mvmt_all$tmode_manual)
# Convert boolean to 0/1 value
mvmt_all$isClosetoLakes <- as.integer(as.logical(mvmt_all$isClosetoLakes))
mvmt_all$isClosetoRails <- as.integer(as.logical(mvmt_all$isClosetoRails))
mvmt_all$isClosetoBus <- as.integer(as.logical(mvmt_all$isClosetoBus))
mvmt_all$isClosetoTrams <- as.integer(as.logical(mvmt_all$isClosetoTrams))
mvmt_all$isClosetoHighways <- as.integer(as.logical(mvmt_all$isClosetoHighways))
mvmt_all$isClosetoCables <- as.integer(as.logical(mvmt_all$isClosetoCables))
mvmt_all$isHighAltitude <- as.integer(as.logical(mvmt_all$isHighAltitude))
# For machine learning, select factor columns only
# Keep segmentID to for visualisation later, drop after the split in train/test data
mvmt_all <- mvmt_all %>% dplyr::select(c("segment_ID_final", "static_percent", "space_duration",
"stepMean_midw_mean", "steplength_mean",
"steplength_sd",
"speed_h_mean", "speed_h_sd",
"speed_h_range", "speed_v_mean",
"speed_v_sd", "speed_v_range",
"a_h_mean", "a_h_sd", "a_h_range",
"a_v_mean", "a_v_sd", "a_v_range",
"ta_mean","ta_sd", "ta_range",
"isClosetoLakes","isClosetoRails",
"isClosetoBus", "isClosetoTrams",
"isClosetoHighways", "isClosetoCables",
"isHighAltitude", "label"))
# Split into training and testing dataset
split <- sample.split(mvmt_all, SplitRatio = 0.3)
mvmt_all_test_g <- subset(mvmt_all, split==TRUE)
mvmt_all_train_g <- subset(mvmt_all, split==FALSE)
mvmt_all_test <- mvmt_all_test_g %>% dplyr::select(-c("segment_ID_final"))
mvmt_all_train <- mvmt_all_train_g %>% dplyr::select(-c("segment_ID_final"))
# For validation
# for 10-fold cross validation
k_fold <- 10
n3 <- nrow(mvmt_all_train)
n_tail <- n3%/%k_fold
rnd_n <- runif(n3)
rank_n <- rank(rnd_n)
chunk <- (rank_n - 1)%/%n_tail + 1
chunk <- as.factor(chunk)
mvmt_all_train_chunk <- mvmt_all_train
mvmt_all_train_chunk$chunk <- chunk
The classification will also be tried without the environment annotations features to see if how good the classification could still be or not be, with only trajectory derivative.
# Create test/train sets without environment annotation
mvmt_all_test_nprox <- mvmt_all_test %>% dplyr::select(-c("isClosetoLakes", "isClosetoRails", "isClosetoBus","isClosetoTrams", "isClosetoHighways", "isClosetoCables", "isHighAltitude"))
mvmt_all_train_nprox <- mvmt_all_train %>% dplyr::select(-c("isClosetoLakes", "isClosetoRails", "isClosetoBus","isClosetoTrams", "isClosetoHighways", "isClosetoCables", "isHighAltitude"))
# For validation
# for 10-fold cross validation
k_fold <- 10
n3 <- nrow(mvmt_all_train_nprox)
n_tail <- n3%/%k_fold
rnd_n <- runif(n3)
rank_n <- rank(rnd_n)
chunk <- (rank_n - 1)%/%n_tail + 1
chunk <- as.factor(chunk)
mvmt_all_train_nprox_chunk <- mvmt_all_train_nprox
mvmt_all_train_nprox_chunk$chunk <- chunk
Below, is the distribution of the segments within the train and validation sets. Some transport modes do not have a lot of segments (kick-scooter, run, t_bar). The algorithms might have some difficulties with it (imbalance training sets).
# n. segment per tmode in the test set
allset = mvmt_all %>% group_by(label) %>% count()
test_set_count <- mvmt_all_test %>% group_by(label) %>% count()
train_set_count <- mvmt_all_train %>% group_by(label) %>% count()
n_seg_train_test_set <- data.frame(
labels = allset$label,
allset = allset$n,
train_set = train_set_count$n,
test_set = test_set_count$n
)
knitr::kable(n_seg_train_test_set, caption = "Segments in test and training sets")
| labels | allset | train_set | test_set |
|---|---|---|---|
| bike | 188 | 139 | 49 |
| boat | 16 | 11 | 5 |
| bus | 412 | 294 | 118 |
| cable_car | 20 | 14 | 6 |
| car | 790 | 576 | 214 |
| kick-scooter | 13 | 8 | 5 |
| plane | 42 | 30 | 12 |
| run | 15 | 10 | 5 |
| ski | 133 | 95 | 38 |
| ski_lift | 104 | 76 | 28 |
| t_bar | 18 | 14 | 4 |
| train | 430 | 307 | 123 |
| tram | 500 | 363 | 137 |
| walk | 1195 | 870 | 325 |
Knn is non linear supervised classifier. It works by assigning the major class label from the closest labelized neighbours to the unlabelized records.
The main parameter to set for Knn is “k” the number of neighbours considered. Different sets of number will be tested.
# Try kv = 5, 11, 50 and 5 is the best Try kv = 3, 4, 5, 7 and 3 is the best Try kv = 1, 2, 3 and 1 is the best
# Build models with 3 parameter sets
for(m in 1:3){
knn_train_cva <- numeric(0)
knn_test_cva <- numeric(0)
if (m == 1){
kv= 1
}
if (m == 2){
kv= 2
}
if (m == 3){
kv= 3
}
for(i in 1:k_fold){
set.seed(21)
knn_train_pred <- knn(train = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_train, -label),
cl = mvmt_all_train_chunk[chunk != i, ]$label, k=kv)
knn_test_pred <- knn(train = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_test, -label),
cl = mvmt_all_train_chunk[chunk != i, ]$label, k=kv)
knn_train_cva <- rbind(knn_train_cva, Accuracy(knn_train_pred, mvmt_all_train$label))
knn_test_cva <- rbind(knn_test_cva, Accuracy(knn_test_pred, mvmt_all_test$label))
}
#print(knn_train_cva)
#print(knn_test_cva)
if (m == 1){
knn_train_pred1 <- knn_train_pred
knn_test_pred1 <- knn_test_pred
knn_train_cva1 <- knn_train_cva
knn_test_cva1 <- knn_test_cva
knn_train_cva_mean1<-mean(knn_train_cva)
knn_test_cva_mean1<-mean(knn_test_cva)
}
if (m == 2){
knn_train_pred2 <- knn_train_pred
knn_test_pred2 <- knn_test_pred
knn_train_cva2 <- knn_train_cva
knn_test_cva2 <- knn_test_cva
knn_train_cva_mean2<-mean(knn_train_cva)
knn_test_cva_mean2<-mean(knn_test_cva)
}
if (m == 3){
knn_train_pred3 <- knn_train_pred
knn_test_pred3 <- knn_test_pred
knn_train_cva3 <- knn_train_cva
knn_test_cva3 <- knn_test_cva
knn_train_cva_mean3<-mean(knn_train_cva)
knn_test_cva_mean3<-mean(knn_test_cva)
}
}
#plot(mvmt_all$label)
# Create dataframe comparison of the tested models
knn_tests <- c(1, 2, 3)
knn_train_cva_mean <- c(round(knn_train_cva_mean1, 2), round(knn_train_cva_mean2,2), round(knn_train_cva_mean3,2))
knn_test_cva_mean <- c(round(knn_test_cva_mean1,2), round(knn_test_cva_mean2,2), round(knn_test_cva_mean3,2))
res_model_knn <- data.frame(model_no = knn_tests, mean_train = knn_train_cva_mean, mean_test = knn_test_cva_mean)
knitr::kable(res_model_knn, caption = "Knn - models Accuracy on train and test data")
| model_no | mean_train | mean_test |
|---|---|---|
| 1 | 0.97 | 0.69 |
| 2 | 0.84 | 0.69 |
| 3 | 0.82 | 0.69 |
Based on the accuracy of each model for train/test sets, the first model with 3 nearest neighbours seems to perform best both on train and test sets. This model will then be kept for further analysis.
Confusion matrix - knn The confusion matrix below indicates the correct classification of the segments in the tmode categories.
We can see Knn had some confusion between car and train.
# Confusion matrix
print("Confusion matrix - KNN")
## [1] "Confusion matrix - KNN"
cm_knn <- caret::confusionMatrix(knn_test_pred1, mvmt_all_test$label)
print(cm_knn)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 36 0 6 1 2 1 0 0 10 2
## boat 1 1 1 0 0 0 0 0 0 0
## bus 1 2 50 2 18 0 0 0 10 1
## cable_car 1 0 0 2 0 0 0 0 0 4
## car 0 0 12 0 146 1 0 0 7 0
## kick-scooter 0 0 0 0 0 0 0 0 0 1
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 0 0
## ski 3 0 16 0 2 1 0 1 5 0
## ski_lift 4 0 1 1 0 0 0 0 1 20
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 1 0 0 37 0 2 0 0 0
## tram 1 1 26 0 9 2 0 0 4 0
## walk 2 0 6 0 0 0 0 3 1 0
## Reference
## Prediction t_bar train tram walk
## bike 1 0 7 2
## boat 0 0 1 1
## bus 0 3 23 2
## cable_car 0 0 0 0
## car 0 52 6 2
## kick-scooter 0 0 1 0
## plane 0 2 0 0
## run 0 0 0 1
## ski 0 0 4 0
## ski_lift 1 0 1 0
## t_bar 1 0 0 0
## train 0 63 0 0
## tram 1 0 87 2
## walk 0 3 7 315
##
## Overall Statistics
##
## Accuracy : 0.6894
## 95% CI : (0.6607, 0.7171)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6205
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.73469 0.2000000 0.42373 0.333333
## Specificity 0.96863 0.9962406 0.93481 0.995296
## Pos Pred Value 0.52941 0.2000000 0.44643 0.285714
## Neg Pred Value 0.98701 0.9962406 0.92894 0.996234
## Prevalence 0.04584 0.0046773 0.11038 0.005613
## Detection Rate 0.03368 0.0009355 0.04677 0.001871
## Detection Prevalence 0.06361 0.0046773 0.10477 0.006548
## Balanced Accuracy 0.85166 0.5981203 0.67927 0.664315
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.6822 0.000000 0.833333 0.2000000
## Specificity 0.9064 0.998120 0.998108 0.9990602
## Pos Pred Value 0.6460 0.000000 0.833333 0.5000000
## Neg Pred Value 0.9193 0.995314 0.998108 0.9962512
## Prevalence 0.2002 0.004677 0.011225 0.0046773
## Detection Rate 0.1366 0.000000 0.009355 0.0009355
## Detection Prevalence 0.2114 0.001871 0.011225 0.0018709
## Balanced Accuracy 0.7943 0.499060 0.915721 0.5995301
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.131579 0.71429 0.2500000 0.51220
## Specificity 0.973812 0.99135 1.0000000 0.95772
## Pos Pred Value 0.156250 0.68966 1.0000000 0.61165
## Neg Pred Value 0.968177 0.99231 0.9971910 0.93789
## Prevalence 0.035547 0.02619 0.0037418 0.11506
## Detection Rate 0.004677 0.01871 0.0009355 0.05893
## Detection Prevalence 0.029935 0.02713 0.0009355 0.09635
## Balanced Accuracy 0.552695 0.85282 0.6250000 0.73496
## Class: tram Class: walk
## Sensitivity 0.63504 0.9692
## Specificity 0.95064 0.9704
## Pos Pred Value 0.65414 0.9347
## Neg Pred Value 0.94658 0.9863
## Prevalence 0.12816 0.3040
## Detection Rate 0.08138 0.2947
## Detection Prevalence 0.12442 0.3152
## Balanced Accuracy 0.79284 0.9698
# Models without proximities
# Build models with 3 parameter sets
for(m in 1:3){
knn_train_cva <- numeric(0)
knn_test_cva <- numeric(0)
if (m == 1){
kv= 5
}
if (m == 2){
kv= 11
}
if (m == 3){
kv= 50
}
for(i in 1:k_fold){
set.seed(21)
knn_train_pred <- knn(train = dplyr::select(mvmt_all_train_nprox_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_train_nprox, -label),
cl = mvmt_all_train_nprox_chunk[chunk != i, ]$label, k=kv)
knn_test_pred <- knn(train = dplyr::select(mvmt_all_train_nprox_chunk[chunk != i, ], -label, -chunk),
test = dplyr::select(mvmt_all_test_nprox, -label),
cl = mvmt_all_train_nprox_chunk[chunk != i, ]$label, k=kv)
knn_train_cva <- rbind(knn_train_cva, Accuracy(knn_train_pred, mvmt_all_train_nprox_chunk$label))
knn_test_cva <- rbind(knn_test_cva, Accuracy(knn_test_pred, mvmt_all_test_nprox$label))
}
#print(knn_train_cva)
#print(knn_test_cva)
if (m == 1){
knn_train_nprox_pred1 <- knn_train_pred
knn_test_nprox_pred1 <- knn_test_pred
knn_train_nprox_cva1 <- knn_train_cva
knn_test_nprox_cva1 <- knn_test_cva
knn_train_nprox_cva_mean1<-mean(knn_train_cva)
knn_test_nprox_cva_mean1<-mean(knn_test_cva)
}
if (m == 2){
knn_train_nprox_pred2 <- knn_train_pred
knn_test_nprox_pred2 <- knn_test_pred
knn_train_nprox_cva2 <- knn_train_cva
knn_test_nprox_cva2 <- knn_test_cva
knn_train_nprox_cva_mean2<-mean(knn_train_cva)
knn_test_nprox_cva_mean2<-mean(knn_test_cva)
}
if (m == 3){
knn_train_nprox_pred3 <- knn_train_pred
knn_test_nprox_pred3 <- knn_test_pred
knn_train_nprox_cva3 <- knn_train_cva
knn_test_nprox_cva3 <- knn_test_cva
knn_train_nprox_cva_mean3<-mean(knn_train_cva)
knn_test_nprox_cva_mean3<-mean(knn_test_cva)
}
}
#plot(mvmt_all$label)
# Create dataframe comparison of the tested models
knn_tests <- c(1, 2, 3)
knn_train_nprox_cva_mean <- c(round(knn_train_nprox_cva_mean1, 2), round(knn_train_nprox_cva_mean2,2), round(knn_train_nprox_cva_mean3,2))
knn_test_nprox_cva_mean <- c(round(knn_test_nprox_cva_mean1,2), round(knn_test_nprox_cva_mean2,2), round(knn_test_nprox_cva_mean3,2))
res_model_nprox_knn <- data.frame(model_no = knn_tests, mean_train = knn_train_nprox_cva_mean, mean_test = knn_test_nprox_cva_mean)
knitr::kable(res_model_nprox_knn, caption = "Knn - no proximity attributes - models Accuracy on train and test data")
| model_no | mean_train | mean_test |
|---|---|---|
| 1 | 0.80 | 0.71 |
| 2 | 0.76 | 0.71 |
| 3 | 0.70 | 0.67 |
# Confusion matrix
print("Confusion matrix - KNN, no proximity attributes")
## [1] "Confusion matrix - KNN, no proximity attributes"
cm_knn_nprox <- caret::confusionMatrix(knn_test_nprox_pred1, mvmt_all_test$label)
print(cm_knn_nprox)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 28 1 12 3 3 1 0 0 11 4
## boat 1 0 1 0 0 0 0 0 0 0
## bus 6 2 56 0 14 0 0 0 9 0
## cable_car 2 0 0 1 0 0 0 0 0 2
## car 0 0 15 0 161 0 0 0 5 0
## kick-scooter 0 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 0 0
## ski 1 0 11 0 3 0 0 0 2 0
## ski_lift 7 0 0 1 0 0 0 0 1 20
## t_bar 0 2 0 0 0 2 0 0 0 2
## train 0 0 1 0 30 1 2 0 1 0
## tram 1 0 20 0 3 1 0 1 8 0
## walk 3 0 2 1 0 0 0 3 1 0
## Reference
## Prediction t_bar train tram walk
## bike 0 0 5 1
## boat 0 0 1 0
## bus 0 2 20 2
## cable_car 0 0 0 0
## car 0 58 8 1
## kick-scooter 0 0 0 0
## plane 0 2 0 0
## run 0 0 0 2
## ski 0 0 3 0
## ski_lift 1 0 2 0
## t_bar 2 0 0 0
## train 0 58 1 1
## tram 0 1 90 1
## walk 1 2 7 317
##
## Overall Statistics
##
## Accuracy : 0.6978
## 95% CI : (0.6693, 0.7253)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6299
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.57143 0.000000 0.47458 0.1666667
## Specificity 0.95980 0.997180 0.94217 0.9962371
## Pos Pred Value 0.40580 0.000000 0.50450 0.2000000
## Neg Pred Value 0.97900 0.995310 0.93528 0.9953008
## Prevalence 0.04584 0.004677 0.11038 0.0056127
## Detection Rate 0.02619 0.000000 0.05239 0.0009355
## Detection Prevalence 0.06455 0.002806 0.10384 0.0046773
## Balanced Accuracy 0.76562 0.498590 0.70837 0.5814519
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.7523 0.000000 0.833333 0.2000000
## Specificity 0.8982 1.000000 0.998108 0.9981203
## Pos Pred Value 0.6492 NaN 0.833333 0.3333333
## Neg Pred Value 0.9354 0.995323 0.998108 0.9962477
## Prevalence 0.2002 0.004677 0.011225 0.0046773
## Detection Rate 0.1506 0.000000 0.009355 0.0009355
## Detection Prevalence 0.2320 0.000000 0.011225 0.0028064
## Balanced Accuracy 0.8253 0.500000 0.915721 0.5990602
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.052632 0.71429 0.500000 0.47154
## Specificity 0.982541 0.98847 0.994366 0.96089
## Pos Pred Value 0.100000 0.62500 0.250000 0.61053
## Neg Pred Value 0.965682 0.99229 0.998115 0.93326
## Prevalence 0.035547 0.02619 0.003742 0.11506
## Detection Rate 0.001871 0.01871 0.001871 0.05426
## Detection Prevalence 0.018709 0.02993 0.007484 0.08887
## Balanced Accuracy 0.517586 0.85138 0.747183 0.71622
## Class: tram Class: walk
## Sensitivity 0.65693 0.9754
## Specificity 0.96137 0.9731
## Pos Pred Value 0.71429 0.9407
## Neg Pred Value 0.95016 0.9891
## Prevalence 0.12816 0.3040
## Detection Rate 0.08419 0.2965
## Detection Prevalence 0.11787 0.3152
## Balanced Accuracy 0.80915 0.9743
Random Forest is a multiple decision tree algorithm.
The main parameters is the number of trees. Three sets of value will be tested.
# Build 3 models with different parameter sets
#20, 40, 80
#40, 60, 80
#80, 100, 120
#80, 90, 100
#75, 80, 85
for(m in 1:3){ # Loop through the different models
if (m == 1){
ntree_value= 75}
if (m == 2){
ntree_value= 80}
if (m == 3){
ntree_value= 85}
rf_train_cva <- numeric(0)
rf_test_cva <- numeric(0)
for(i in 1:k_fold){
set.seed(21)
rf <- randomForest(x = dplyr::select(mvmt_all_train_chunk[chunk != i, ], -label, -chunk),
y = mvmt_all_train_chunk[chunk != i, ]$label,
importance=TRUE, proximity=TRUE, ntree = ntree_value)
rf_train_pred <- predict(rf, dplyr::select(mvmt_all_train, -label), type="class")
rf_test_pred <- predict(rf, dplyr::select(mvmt_all_test, -label), type="class")
rf_train_cva <- rbind(rf_train_cva, Accuracy(rf_train_pred, mvmt_all_train$label))
rf_test_cva <- rbind(rf_test_cva, Accuracy(rf_test_pred, mvmt_all_test$label))
#importance(rf)
#varImpPlot(rf)
}
#print(rf_train_cva)
#print(rf_test_cva)
# Summary recall, precision, F1
#https://blog.revolutionanalytics.com/2016/03/com_class_eval_metrics_r.html#perclass
cm_rf_simple <- ConfusionMatrix(rf_test_pred, mvmt_all_test$label)
n = sum(cm_rf_simple) # n. instances
nc = nrow(cm_rf_simple) # n. classes
diag = diag(cm_rf_simple) # n. correctly classified instances per class
rowsums = apply(cm_rf_simple, 1, sum) # n. instances per class
colsums = apply(cm_rf_simple, 2, sum) # n. predictions per class
p = rowsums / n # distribution of instances over the actual classes
q = colsums / n # distribution of instances over the predicted classes
precision = diag / colsums
recall = diag / rowsums
f1 = 2 * precision * recall / (precision + recall)
print(paste("n_trees: ", ntree_value))
df <- data.frame(precision, recall, f1) %>% as.data.frame()
df %>% replace(is.na(df), 0) %>% summary() %>% print()
# Save prediction and trains
if (m == 1){
rf1 <- rf
rf_train_pred1 <- rf_train_pred
rf_test_pred1 <- rf_test_pred
rf_train_cva1 <- rf_train_cva
rf_test_cva1 <- rf_test_cva
rf_train_cva_mean1<-mean(rf_train_cva)
rf_test_cva_mean1<-mean(rf_test_cva)
}
if (m == 2){
rf2 <- rf
rf_train_pred2 <- rf_train_pred
rf_test_pred2 <- rf_test_pred
rf_train_cva2 <- rf_train_cva
rf_test_cva2 <- rf_test_cva
rf_train_cva_mean2<-mean(rf_train_cva)
rf_test_cva_mean2<-mean(rf_test_cva)
}
if (m == 3){
rf3 <- rf
rf_train_pred3 <- rf_train_pred
rf_test_pred3 <- rf_test_pred
rf_train_cva3 <- rf_train_cva
rf_test_cva3 <- rf_test_cva
rf_train_cva_mean3<-mean(rf_train_cva)
rf_test_cva_mean3<-mean(rf_test_cva)
}}
## [1] "n_trees: 75"
## precision recall f1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8220 1st Qu.:0.8166 1st Qu.:0.8026
## Median :0.9336 Median :0.9299 Median :0.9151
## Mean :0.7893 Mean :0.7540 Mean :0.7494
## 3rd Qu.:0.9596 3rd Qu.:0.9847 3rd Qu.:0.9629
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## [1] "n_trees: 80"
## precision recall f1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8220 1st Qu.:0.8166 1st Qu.:0.8026
## Median :0.9313 Median :0.9299 Median :0.9151
## Mean :0.7885 Mean :0.7532 Mean :0.7486
## 3rd Qu.:0.9593 3rd Qu.:0.9803 3rd Qu.:0.9598
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## [1] "n_trees: 85"
## precision recall f1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.8220 1st Qu.:0.8218 1st Qu.:0.8064
## Median :0.9368 Median :0.9431 Median :0.9161
## Mean :0.7894 Mean :0.7560 Mean :0.7503
## 3rd Qu.:0.9583 3rd Qu.:0.9824 3rd Qu.:0.9600
## Max. :1.0000 Max. :1.0000 Max. :1.0000
# Create dataframe comparison of the tested models
# rf_tests <- c(1, 2, 3)
# rf_train_cva_mean <- c(round(rf_train_cva_mean1, 3), round(rf_train_cva_mean2,3), round(rf_train_cva_mean3,3))
# rf_test_cva_mean <- c(round(rf_test_cva_mean1,3), round(rf_test_cva_mean2,3), round(rf_test_cva_mean3,3))
# res_model_rf <- data.frame(model_no = rf_tests, mean_train = rf_train_cva_mean, mean_test = rf_test_cva_mean)
# knitr::kable(res_model_rf, caption = "RF - models Accuracy on train and test data")
# Summary accuracy on test and train sets
rf_tests <- c(1, 2, 3)
rf_train_cva_mean <- c(round(rf_train_cva_mean1, 6), round(rf_train_cva_mean2,6), round(rf_train_cva_mean3,6))
rf_test_cva_mean <- c(round(rf_test_cva_mean1,6), round(rf_test_cva_mean2,6), round(rf_test_cva_mean3,6))
res_model_rf <- data.frame(model_no = rf_tests, mean_train = rf_train_cva_mean, mean_test = rf_test_cva_mean)
knitr::kable(res_model_rf, caption = "RF - models Accuracy on train and test data")
| model_no | mean_train | mean_test |
|---|---|---|
| 1 | 0.993516 | 0.919551 |
| 2 | 0.993267 | 0.918803 |
| 3 | 0.993516 | 0.919925 |
The accuracy of each model for train/test sets tells us that the performances are very similar for the 3 different number of trees. Because the first one has less tree (less overfitting), this model will then be kept for further analysis.
Confusion matrix - RF
We can see RF perform generally better than Knn. There are no obvious confusion of train and car.
print("Confusion matrix - RF")
## [1] "Confusion matrix - RF"
cm_rf <- caret::confusionMatrix(rf_test_pred1, mvmt_all_test$label)
print(cm_rf)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 46 0 0 0 0 1 0 0 0 0
## boat 0 5 1 0 0 0 0 0 0 0
## bus 1 0 93 0 15 2 0 0 0 0
## cable_car 0 0 0 1 0 0 0 0 0 0
## car 0 0 8 0 193 0 0 0 2 0
## kick-scooter 0 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 1 0 1 0 0 0 35 1
## ski_lift 0 0 0 5 0 0 0 0 1 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 5 0 0 0 0 0
## tram 1 0 13 0 0 0 0 0 0 0
## walk 1 0 2 0 0 2 0 5 0 0
## Reference
## Prediction t_bar train tram walk
## bike 0 0 1 0
## boat 0 0 0 0
## bus 0 0 9 2
## cable_car 0 0 0 0
## car 0 1 0 0
## kick-scooter 0 0 0 1
## plane 0 0 0 0
## run 0 0 0 0
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 120 0 0
## tram 0 0 125 1
## walk 0 2 2 321
##
## Overall Statistics
##
## Accuracy : 0.9186
## 95% CI : (0.9006, 0.9343)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9007
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.93878 1.000000 0.7881 0.1666667
## Specificity 0.99804 0.999060 0.9695 1.0000000
## Pos Pred Value 0.95833 0.833333 0.7623 1.0000000
## Neg Pred Value 0.99706 1.000000 0.9736 0.9953184
## Prevalence 0.04584 0.004677 0.1104 0.0056127
## Detection Rate 0.04303 0.004677 0.0870 0.0009355
## Detection Prevalence 0.04490 0.005613 0.1141 0.0009355
## Balanced Accuracy 0.96841 0.999530 0.8788 0.5833333
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.9019 0.0000000 1.00000 0.000000
## Specificity 0.9871 0.9990602 1.00000 1.000000
## Pos Pred Value 0.9461 0.0000000 1.00000 NaN
## Neg Pred Value 0.9757 0.9953184 1.00000 0.995323
## Prevalence 0.2002 0.0046773 0.01123 0.004677
## Detection Rate 0.1805 0.0000000 0.01123 0.000000
## Detection Prevalence 0.1908 0.0009355 0.01123 0.000000
## Balanced Accuracy 0.9445 0.4995301 1.00000 0.500000
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.92105 0.96429 1.000000 0.9756
## Specificity 0.99709 0.99424 1.000000 0.9947
## Pos Pred Value 0.92105 0.81818 1.000000 0.9600
## Neg Pred Value 0.99709 0.99903 1.000000 0.9968
## Prevalence 0.03555 0.02619 0.003742 0.1151
## Detection Rate 0.03274 0.02526 0.003742 0.1123
## Detection Prevalence 0.03555 0.03087 0.003742 0.1169
## Balanced Accuracy 0.95907 0.97926 1.000000 0.9852
## Class: tram Class: walk
## Sensitivity 0.9124 0.9877
## Specificity 0.9839 0.9812
## Pos Pred Value 0.8929 0.9582
## Neg Pred Value 0.9871 0.9946
## Prevalence 0.1282 0.3040
## Detection Rate 0.1169 0.3003
## Detection Prevalence 0.1310 0.3134
## Balanced Accuracy 0.9482 0.9844
# Models without proximities
# Build 3 models with different parameter sets
for(m in 1:3){ # Loop through the different models
if (m == 1){
ntree_value= 20}
if (m == 2){
ntree_value= 40}
if (m == 3){
ntree_value= 80}
rf_train_cva <- numeric(0)
rf_test_cva <- numeric(0)
for(i in 1:k_fold){
set.seed(21)
rf <- randomForest(x = dplyr::select(mvmt_all_train_nprox_chunk[chunk != i, ], -label, -chunk),
y = mvmt_all_train_nprox_chunk[chunk != i, ]$label,
importance=TRUE, proximity=TRUE, ntree = ntree_value)
rf_train_pred <- predict(rf, dplyr::select(mvmt_all_train, -label), type="class")
rf_test_pred <- predict(rf, dplyr::select(mvmt_all_test, -label), type="class")
rf_train_cva <- rbind(rf_train_cva, Accuracy(rf_train_pred, mvmt_all_train$label))
rf_test_cva <- rbind(rf_test_cva, Accuracy(rf_test_pred, mvmt_all_test$label))
#importance(rf)
#varImpPlot(rf)
}
#print(rf_train_cva)
#print(rf_test_cva)
if (m == 1){
rf_nprox1 <- rf
rf_train_nprox_pred1 <- rf_train_pred
rf_test_nprox_pred1 <- rf_test_pred
rf_train_nprox_cva1 <- rf_train_cva
rf_test_nprox_cva1 <- rf_test_cva
rf_train_nprox_cva_mean1<-mean(rf_train_cva)
rf_test_nprox_cva_mean1<-mean(rf_test_cva)
}
if (m == 2){
rf_nprox2 <- rf
rf_train_nprox_pred2 <- rf_train_pred
rf_test_nprox_pred2 <- rf_test_pred
rf_train_nprox_cva2 <- rf_train_cva
rf_test_nprox_cva2 <- rf_test_cva
rf_train_nprox_cva_mean2<-mean(rf_train_cva)
rf_test_nprox_cva_mean2<-mean(rf_test_cva)
}
if (m == 3){
rf_nprox3 <- rf
rf_train_nprox_pred3 <- rf_train_pred
rf_test_nprox_pred3 <- rf_test_pred
rf_train_nprox_cva3 <- rf_train_cva
rf_test_nprox_cva3 <- rf_test_cva
rf_train_nprox_cva_mean3<-mean(rf_train_cva)
rf_test_nprox_cva_mean3<-mean(rf_test_cva)
}}
# Create dataframe comparison of the tested models
rf_tests <- c(1, 2, 3)
rf_train_nprox_cva_mean <- c(round(rf_train_nprox_cva_mean1, 3), round(rf_train_nprox_cva_mean2,3), round(rf_train_nprox_cva_mean3,3))
rf_test_nprox_cva_mean <- c(round(rf_test_nprox_cva_mean1,3), round(rf_test_nprox_cva_mean2,3), round(rf_test_nprox_cva_mean3,3))
res_model_nprox_rf <- data.frame(model_no = rf_tests, mean_train = rf_train_nprox_cva_mean, mean_test = rf_test_nprox_cva_mean)
knitr::kable(res_model_nprox_rf, caption = "RF - models Accuracy on train and test data")
| model_no | mean_train | mean_test |
|---|---|---|
| 1 | 0.984 | 0.853 |
| 2 | 0.986 | 0.856 |
| 3 | 0.986 | 0.857 |
Importances of features - RF no proximities
print("Importance of features:")
## [1] "Importance of features:"
importance(rf_nprox1)
## bike boat bus cable_car car
## static_percent 3.928247 0.0000000 4.9614168 -0.6574344 2.0693802
## space_duration 3.358627 1.0259784 3.9255453 0.9288656 4.2761581
## stepMean_midw_mean 7.607332 0.3015799 7.0798111 3.0212581 5.9299948
## steplength_mean 6.209637 1.7135206 4.3596364 0.8719652 6.0724432
## steplength_sd 2.621799 0.0000000 2.9645456 2.3346425 2.2447250
## speed_h_mean 3.294230 -1.0259784 3.5814043 1.5311180 4.1653085
## speed_h_sd 4.066608 1.7735631 4.8200174 1.5410637 2.7211864
## speed_h_range 2.877160 0.0000000 3.2539223 1.6784604 2.6530967
## speed_v_mean 3.107721 1.0259784 3.1189915 2.9853707 4.4279074
## speed_v_sd 2.061638 1.8351117 4.3136647 1.6243656 1.8398953
## speed_v_range 2.625887 0.9622746 4.8867632 -1.0259784 2.6055641
## a_h_mean 3.112322 0.0000000 -0.9109215 0.0000000 1.8764547
## a_h_sd 4.391823 -0.2038059 6.3871082 2.4722609 4.1434325
## a_h_range 2.799586 0.0000000 4.1354405 0.8222935 2.2761537
## a_v_mean 4.255818 1.8635126 5.1273557 3.3811441 6.1647476
## a_v_sd 3.091404 0.6348407 3.4425088 1.0501633 2.2365258
## a_v_range 2.772820 1.7523636 3.5332259 0.5529074 2.1788391
## ta_mean 2.400814 0.0000000 1.5134672 -1.4484736 -2.1319789
## ta_sd 2.324886 1.2863708 3.0802321 0.0000000 -2.9870804
## ta_range 1.349766 1.0259784 1.8403517 1.4064217 0.9110564
## kick-scooter plane run ski ski_lift
## static_percent 0.3342631 1.44468252 1.8351288 2.69559689 1.508911
## space_duration 1.4907120 2.85599405 2.2598118 2.84486704 3.954953
## stepMean_midw_mean 1.4414999 3.88671258 2.5886476 4.08887225 4.045015
## steplength_mean 2.7208324 4.04592315 3.6350484 3.66918611 3.521272
## steplength_sd 0.7488309 0.04409317 1.0259784 3.70483354 1.429160
## speed_h_mean 2.4051369 4.07447981 1.0456563 1.73370206 3.546454
## speed_h_sd 2.0952909 0.58177549 1.8236324 3.13021061 3.697990
## speed_h_range 1.0259784 -1.30173643 1.4414999 1.98207273 0.909050
## speed_v_mean 0.7925594 1.84102790 1.4740926 3.74263886 2.900043
## speed_v_sd 1.6252721 2.76135174 1.0259784 2.85571128 2.000607
## speed_v_range 1.4273829 2.54542484 0.2778850 1.96709512 2.518574
## a_h_mean 0.0000000 0.00000000 1.0259784 0.05435666 1.319641
## a_h_sd 2.6437146 0.29646014 1.9663032 3.18456511 3.762698
## a_h_range 1.7541160 0.00000000 1.0259784 1.95562304 1.911792
## a_v_mean 1.0259784 2.66440309 1.4064217 5.51530398 3.348560
## a_v_sd -0.4740455 2.46558366 -0.4494666 2.65941387 2.894881
## a_v_range 1.3674593 2.16224163 0.0000000 2.55915333 2.917282
## ta_mean 0.0000000 1.87637719 1.0259784 0.20664008 1.877421
## ta_sd 0.0000000 0.00000000 0.0000000 0.69705510 2.334302
## ta_range -1.0259784 0.70445444 -1.0259784 1.72445248 1.362882
## t_bar train tram walk MeanDecreaseAccuracy
## static_percent 2.1159276 2.602153 4.33995367 1.158397 8.343057
## space_duration 4.1060798 4.470413 3.98718988 4.812322 6.503108
## stepMean_midw_mean 4.3729997 6.267447 5.61538660 5.627940 7.800473
## steplength_mean 4.6289519 8.335308 5.55067868 3.699477 7.418592
## steplength_sd 3.3052636 3.414061 2.77166522 1.687548 4.218064
## speed_h_mean 4.0810820 4.636225 3.97574901 5.478526 5.992271
## speed_h_sd 3.0816125 2.715107 4.46509643 5.767640 8.129958
## speed_h_range 2.3538426 2.373172 2.41363557 2.049674 4.346417
## speed_v_mean 2.3846086 1.684064 3.29489522 2.301669 5.946005
## speed_v_sd 1.9011951 1.768435 0.50507643 1.198497 4.629210
## speed_v_range 0.8213658 2.794212 0.07144052 2.023937 6.178278
## a_h_mean 0.0000000 1.735889 1.23558274 4.263996 5.811155
## a_h_sd 3.4577923 1.285068 5.83040767 1.813589 8.347908
## a_h_range 2.3159044 1.754245 3.69583643 2.335584 4.894086
## a_v_mean 2.8567848 1.073983 3.61774194 2.277007 8.150722
## a_v_sd 0.2297182 1.312384 2.04236136 1.256677 4.381567
## a_v_range 1.1902672 1.883062 1.85030524 0.984694 4.444254
## ta_mean 1.4870448 3.449405 2.10986212 2.672247 3.807141
## ta_sd 2.2069609 4.858903 2.57250619 3.099716 5.259880
## ta_range 2.4500727 1.415848 2.38494556 2.637595 5.311247
## MeanDecreaseGini
## static_percent 61.73196
## space_duration 196.58977
## stepMean_midw_mean 344.77162
## steplength_mean 264.35005
## steplength_sd 103.06358
## speed_h_mean 236.24356
## speed_h_sd 107.04253
## speed_h_range 67.41913
## speed_v_mean 75.63247
## speed_v_sd 47.40264
## speed_v_range 48.84578
## a_h_mean 34.18241
## a_h_sd 96.00767
## a_h_range 55.23579
## a_v_mean 107.84383
## a_v_sd 57.18049
## a_v_range 50.49911
## ta_mean 35.48348
## ta_sd 38.48686
## ta_range 39.42865
varImpPlot(rf_nprox1)
Confusion matrix - RF
print("Confusion matrix - RF, no proximity attributes")
## [1] "Confusion matrix - RF, no proximity attributes"
cm_rf_nprox <- caret::confusionMatrix(rf_test_nprox_pred1, mvmt_all_test$label)
print(cm_rf_nprox)
## Confusion Matrix and Statistics
##
## Reference
## Prediction bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 44 0 0 0 0 2 0 0 0 0
## boat 0 4 0 0 0 0 0 0 0 0
## bus 2 0 95 0 13 2 0 0 0 0
## cable_car 0 0 0 2 0 0 0 0 0 0
## car 1 0 7 0 174 0 0 0 2 0
## kick-scooter 0 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 2 0 2 0 0 0 34 1
## ski_lift 0 0 0 4 0 0 0 0 2 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 22 0 0 0 0 0
## tram 2 0 12 0 3 0 0 0 0 0
## walk 0 1 2 0 0 1 0 5 0 0
## Reference
## Prediction t_bar train tram walk
## bike 0 0 3 0
## boat 0 0 0 0
## bus 0 0 21 2
## cable_car 0 0 0 0
## car 0 33 2 0
## kick-scooter 0 0 0 1
## plane 0 0 0 0
## run 0 0 0 0
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 88 1 0
## tram 0 0 105 1
## walk 0 2 5 321
##
## Overall Statistics
##
## Accuracy : 0.8513
## 95% CI : (0.8285, 0.8721)
## No Information Rate : 0.304
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8182
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: bike Class: boat Class: bus Class: cable_car
## Sensitivity 0.89796 0.800000 0.80508 0.333333
## Specificity 0.99510 1.000000 0.95794 1.000000
## Pos Pred Value 0.89796 1.000000 0.70370 1.000000
## Neg Pred Value 0.99510 0.999061 0.97537 0.996251
## Prevalence 0.04584 0.004677 0.11038 0.005613
## Detection Rate 0.04116 0.003742 0.08887 0.001871
## Detection Prevalence 0.04584 0.003742 0.12629 0.001871
## Balanced Accuracy 0.94653 0.900000 0.88151 0.666667
## Class: car Class: kick-scooter Class: plane Class: run
## Sensitivity 0.8131 0.0000000 1.00000 0.000000
## Specificity 0.9474 0.9990602 1.00000 1.000000
## Pos Pred Value 0.7945 0.0000000 1.00000 NaN
## Neg Pred Value 0.9529 0.9953184 1.00000 0.995323
## Prevalence 0.2002 0.0046773 0.01123 0.004677
## Detection Rate 0.1628 0.0000000 0.01123 0.000000
## Detection Prevalence 0.2049 0.0009355 0.01123 0.000000
## Balanced Accuracy 0.8802 0.4995301 1.00000 0.500000
## Class: ski Class: ski_lift Class: t_bar Class: train
## Sensitivity 0.89474 0.96429 1.000000 0.71545
## Specificity 0.99515 0.99424 1.000000 0.97569
## Pos Pred Value 0.87179 0.81818 1.000000 0.79279
## Neg Pred Value 0.99612 0.99903 1.000000 0.96347
## Prevalence 0.03555 0.02619 0.003742 0.11506
## Detection Rate 0.03181 0.02526 0.003742 0.08232
## Detection Prevalence 0.03648 0.03087 0.003742 0.10384
## Balanced Accuracy 0.94494 0.97926 1.000000 0.84557
## Class: tram Class: walk
## Sensitivity 0.76642 0.9877
## Specificity 0.98069 0.9785
## Pos Pred Value 0.85366 0.9525
## Neg Pred Value 0.96617 0.9945
## Prevalence 0.12816 0.3040
## Detection Rate 0.09822 0.3003
## Detection Prevalence 0.11506 0.3152
## Balanced Accuracy 0.87356 0.9831
library(rpart)
mvmt_train_s_dt_chunk <- mvmt_all_train
mvmt_train_s_dt_chunk$chunk <- chunk
# Build models with 3 parameter sets
for(m in 1:3){
dt_train_cva <- numeric(0)
dt_test_cva <- numeric(0)
for(i in 1:k_fold){
set.seed(21)
if (m == 1){
dt <- rpart(label~.,
data=select(mvmt_train_s_dt_chunk[chunk != i, ], -chunk),
method = "class",
minsplit = 100)
}
if (m == 2){
dt <- rpart(label~.,
data=select(mvmt_train_s_dt_chunk[chunk != i, ], -chunk),
method = "class",
minsplit = 200)
}
if (m == 3){
dt <- rpart(label~.,
data=select(mvmt_train_s_dt_chunk[chunk != i, ], -chunk),
method = "class",
minbucket= 400)
}
dt_train_pred <- predict(dt, select(mvmt_all_train, -label), type="class")
dt_test_pred <- predict(dt, select(mvmt_all_test, -label), type="class")
dt_train_cva <- rbind(dt_train_cva, Accuracy(dt_train_pred, mvmt_all_train$label))
dt_test_cva <- rbind(dt_test_cva, Accuracy(dt_test_pred, mvmt_all_test$label))
}
print(dt_train_cva)
print(dt_test_cva)
if (m == 1){
dt_train_pred1 <- dt_train_pred
dt_test_pred1 <- dt_test_pred
dt_train_cva1 <- dt_train_cva
dt_test_cva1 <- dt_test_cva
dt_train_cva_mean1<-mean(dt_train_cva)
dt_test_cva_mean1<-mean(dt_test_cva)
}
if (m == 2){
dt_train_pred2 <- dt_train_pred
dt_test_pred2 <- dt_test_pred
dt_train_cva2 <- dt_train_cva
dt_test_cva2 <- dt_test_cva
dt_train_cva_mean2<-mean(dt_train_cva)
dt_test_cva_mean2<-mean(dt_test_cva)
}
if (m == 3){
dt_train_pred3 <- dt_train_pred
dt_test_pred3 <- dt_test_pred
dt_train_cva3 <- dt_train_cva
dt_test_cva3 <- dt_test_cva
dt_train_cva_mean3<-mean(dt_train_cva)
dt_test_cva_mean3<-mean(dt_test_cva)
}}
## [,1]
## [1,] 0.8806555
## [2,] 0.8827930
## [3,] 0.8827930
## [4,] 0.8820805
## [5,] 0.8827930
## [6,] 0.8831493
## [7,] 0.8831493
## [8,] 0.8813680
## [9,] 0.8820805
## [10,] 0.8810118
## [,1]
## [1,] 0.8709074
## [2,] 0.8699719
## [3,] 0.8699719
## [4,] 0.8709074
## [5,] 0.8690365
## [6,] 0.8727783
## [7,] 0.8746492
## [8,] 0.8727783
## [9,] 0.8681010
## [10,] 0.8690365
## [,1]
## [1,] 0.8589241
## [2,] 0.8471678
## [3,] 0.8471678
## [4,] 0.8460990
## [5,] 0.8471678
## [6,] 0.8471678
## [7,] 0.8475240
## [8,] 0.8453865
## [9,] 0.8560741
## [10,] 0.8453865
## [,1]
## [1,] 0.8456501
## [2,] 0.8362956
## [3,] 0.8362956
## [4,] 0.8344247
## [5,] 0.8353601
## [6,] 0.8362956
## [7,] 0.8409729
## [8,] 0.8362956
## [9,] 0.8400374
## [10,] 0.8325538
## [,1]
## [1,] 0.6113288
## [2,] 0.6106163
## [3,] 0.6116851
## [4,] 0.6095476
## [5,] 0.6113288
## [6,] 0.6116851
## [7,] 0.6063413
## [8,] 0.6063413
## [9,] 0.6116851
## [10,] 0.6088351
## [,1]
## [1,] 0.5977549
## [2,] 0.5968195
## [3,] 0.5968195
## [4,] 0.5940131
## [5,] 0.5958840
## [6,] 0.5968195
## [7,] 0.5940131
## [8,] 0.5940131
## [9,] 0.5968195
## [10,] 0.5930776
dtrm1 <- dt_train_cva_mean1
dtrm2 <- dt_train_cva_mean2
dtrm3 <- dt_train_cva_mean3
print("Overall accuracy of the 3 train models")
## [1] "Overall accuracy of the 3 train models"
print(c(dtrm1, dtrm2, dtrm3))
## [1] 0.8821874 0.8488066 0.6099394
dtem1 <- dt_test_cva_mean1
dtem2 <- dt_test_cva_mean2
dtem3 <- dt_test_cva_mean3
print("Overall accuracy of the 3 test models")
## [1] "Overall accuracy of the 3 test models"
print(c(dtem1, dtem2, dtem3))
## [1] 0.8708138 0.8374181 0.5956034
dmf1 <- mean(rbind(dt_train_cva1,
F1_Score(dt_train_pred1,
mvmt_all_train$label)))
dmf2 <- mean(rbind(dt_train_cva2,
F1_Score(dt_train_pred2,
mvmt_all_train$label)))
dmf3 <- mean(rbind(dt_train_cva3,
F1_Score(dt_train_pred3,
mvmt_all_train$label)))
print("Mean F1-score of the 3 train models")
## [1] "Mean F1-score of the 3 train models"
print(c(dmf1, dmf2, dmf3))
## [1] 0.8726956 0.8423494 NaN
dmr1 <- mean(rbind(dt_train_cva1,
Recall(dt_train_pred1,
mvmt_all_train$label)))
dmr2 <- mean(rbind(dt_train_cva2,
Recall(dt_train_pred2,
mvmt_all_train$label)))
dmr3 <- mean(rbind(dt_train_cva3,
Recall(dt_train_pred3,
mvmt_all_train$label)))
print("Mean Recall of the 3 train models")
## [1] "Mean Recall of the 3 train models"
print(c(dmr1, dmr2, dmr3))
## [1] 0.8639050 0.8335588 NaN
dmp1 <- mean(rbind(dt_train_cva1,
Precision(dt_train_pred1,
mvmt_all_train$label)))
dmp2 <- mean(rbind(dt_train_cva2,
Precision(dt_train_pred2,
mvmt_all_train$label)))
dmp3 <- mean(rbind(dt_train_cva3,
Precision(dt_train_pred3,
mvmt_all_train$label)))
print("Mean Precision of the 3 train models")
## [1] "Mean Precision of the 3 train models"
print(c(dmp1, dmp2, dmp3))
## [1] 0.8843953 0.8540491 0.5544904
We generated box plots of all transport modes for each trajectory derivative to have a better understanding the how movement characteristics differ between different all transport modes.
In order to show the major distribution of value, for each derivative and each transport mode, we calculated the quantile values corresponding to probabilities of 10% and 90%. And by using the minimum of 10% quantile values and maximum of 90% quantile values as low limit and high limit, we then created the box plot focusing on the major parts of distribution.
Take the box plot of static_percent for example, the groups with the highest static_ _percent are ski, bus, tram and train, which is corresponding to the movement with stops. Take the box plot of space_duration for example, space_duration of plane, train and car have higher distribution than that of other transport modes.
# Boxplot
colourCount = length(unique(mvmt_sta$tmode_manual))
getPalette = colorRampPalette(brewer.pal(9, "Set3"))
# For 28 variables
limtable <- data.frame(matrix(nrow = 30, ncol = 2))
rownames(limtable) <- colnames(mvmt_sta)
limtable <- tail(limtable, -2)
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(duration_secs, 0.1)[[1]],
highlim = quantile(duration_secs, 0.9)[[1]])
limtable[1,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(static_percent, 0.1)[[1]],
highlim = quantile(static_percent, 0.9)[[1]])
limtable[2,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(space_duration, 0.1)[[1]],
highlim = quantile(space_duration, 0.9)[[1]])
limtable[3,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(stepMean_midw_mean, 0.1)[[1]],
highlim = quantile(stepMean_midw_mean, 0.9)[[1]])
limtable[4,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(steplength_mean, 0.1)[[1]],
highlim = quantile(steplength_mean, 0.9)[[1]])
limtable[5,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(steplength_sd, 0.1)[[1]],
highlim = quantile(steplength_sd, 0.9)[[1]])
limtable[6,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_h_mean, 0.1)[[1]],
highlim = quantile(speed_h_mean, 0.9)[[1]])
limtable[7,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_h_sd, 0.1)[[1]],
highlim = quantile(speed_h_sd, 0.9)[[1]])
limtable[8,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_h_range, 0.1)[[1]],
highlim = quantile(speed_h_range, 0.9)[[1]])
limtable[9,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_v_mean, 0.1)[[1]],
highlim = quantile(speed_v_mean, 0.9)[[1]])
limtable[10,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_v_sd, 0.1)[[1]],
highlim = quantile(speed_v_sd, 0.9)[[1]])
limtable[11,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(speed_v_range, 0.1)[[1]],
highlim = quantile(speed_v_range, 0.9)[[1]])
limtable[12,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_h_mean, 0.1)[[1]],
highlim = quantile(a_h_mean, 0.9)[[1]])
limtable[13,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_h_sd, 0.1)[[1]],
highlim = quantile(a_h_sd, 0.9)[[1]])
limtable[14,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_h_range, 0.1)[[1]],
highlim = quantile(a_h_range, 0.9)[[1]])
limtable[15,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_v_mean, 0.1)[[1]],
highlim = quantile(a_v_mean, 0.9)[[1]])
limtable[16,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_v_sd, 0.1)[[1]],
highlim = quantile(a_v_sd, 0.9)[[1]])
limtable[17,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(a_v_range, 0.1)[[1]],
highlim = quantile(a_v_range, 0.9)[[1]])
limtable[18,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(ta_mean, 0.1)[[1]],
highlim = quantile(ta_mean, 0.9)[[1]])
limtable[19,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(ta_sd, 0.1)[[1]],
highlim = quantile(ta_sd, 0.9)[[1]])
limtable[20,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(ta_range, 0.1)[[1]],
highlim = quantile(ta_range, 0.9)[[1]])
limtable[21,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoLakes, 0.1)[[1]],
highlim = quantile(isClosetoLakes, 0.9)[[1]])
limtable[22,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoRails, 0.1)[[1]],
highlim = quantile(isClosetoRails, 0.9)[[1]])
limtable[23,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoBus, 0.1)[[1]],
highlim = quantile(isClosetoBus, 0.9)[[1]])
limtable[24,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoTrams, 0.1)[[1]],
highlim = quantile(isClosetoTrams, 0.9)[[1]])
limtable[25,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoHighways, 0.1)[[1]],
highlim = quantile(isClosetoHighways, 0.9)[[1]])
limtable[26,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isClosetoCables, 0.1)[[1]],
highlim = quantile(isClosetoCables, 0.9)[[1]])
limtable[27,] = c(min(lim0$lowlim), max(lim0$highlim))
lim0 <- mvmt_sta %>% group_by(tmode_manual) %>% summarize(
lowlim = quantile(isHighAltitude, 0.1)[[1]],
highlim = quantile(isHighAltitude, 0.9)[[1]])
limtable[28,] = c(min(lim0$lowlim), max(lim0$highlim))
q <- length(mvmt_sta)
for(i in 3:q){
pl <- ggplot(mvmt_sta, aes(x = tmode_manual, y = mvmt_sta[,i],
fill=tmode_manual)) +
geom_boxplot(alpha=0.7) + #, outlier.shape = NA) +
theme(legend.position="none", plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = getPalette(colourCount)) +
ggtitle(paste("Box plot of ", colnames(mvmt_sta)[i])) +
ylab(colnames(mvmt_sta)[i]) +
# coord_cartesian(ylim = c(0, 20))
coord_flip(ylim = c(limtable[i-2,1], limtable[i-2,2])*1.1)
print(pl)
}
It is interesting to analyse the relations between the features. There are some obvious correlations: speed, acceleration and their derivatives, or high altitude correlate with closeness to cables (cable car & ski-lift & t-bar). What is also interesting to notice is the correlation between the proximity to trams and bus (0.4).
# correlation matrix
corr <- mvmt_all %>% dplyr::select(-c("segment_ID_final", "label")) %>% cor(method = "pearson") %>% round(1)
ggcorrplot(corr, hc.order = FALSE, type = "lower",
lab = TRUE)
Summary of the accuracy and kappas of the 4 considered models
The model with the best accuracy and kapp is RF. Removing the environment annotations did not change a lot for Knn but we can see it had an impact on RF.
models <- c("Knn", "Knn no environment", "RF", "RF no environment")
data.frame(models = models,
accuracy = c(round(cm_knn$overall[1],3), round(cm_knn_nprox$overall[1],3), round(cm_rf$overall[1],3), round(cm_rf_nprox$overall[1],3)),
kappa = c(round(cm_knn$overall[2],3), round(cm_knn_nprox$overall[2],3), round(cm_rf$overall[2],3), round(cm_rf_nprox$overall[2],3))
)
## models accuracy kappa
## 1 Knn 0.689 0.620
## 2 Knn no environment 0.698 0.630
## 3 RF 0.919 0.901
## 4 RF no environment 0.851 0.818
Summary of the Precision, Recall and F1
Rf also has better precision, recall and F1 values than Knn.
# Extract precision, recall and F1 from confusion matrix
cm2metrics <- function(cm) {
n = sum(cm) # number of instances
nc = nrow(cm) # number of classes
diag = diag(cm) # number of correctly classified instances per class
rowsums = apply(cm, 1, sum) # number of instances per class
colsums = apply(cm, 2, sum) # number of predictions per class
p = rowsums / n # distribution of instances over the actual classes
q = colsums / n # distribution of instances over the predicted classes
precision = diag / colsums
recall = diag / rowsums
f1 = 2 * precision * recall / (precision + recall)
df <- data.frame(precision, recall, f1) %>% as.data.frame()
df <- df %>% replace(is.na(df), 0)
return(df)
}
# Knn
cm_knn_simple <- ConfusionMatrix(knn_test_pred1, mvmt_all_test$label)
df_knn <- cm2metrics(cm_knn_simple)
# Knn no proximities
cm_knn_np_simple <- ConfusionMatrix(knn_test_nprox_pred1, mvmt_all_test$label)
df_knn_np <- cm2metrics(cm_knn_np_simple)
# RF
cm_rf_simple <- ConfusionMatrix(rf_test_pred1, mvmt_all_test$label)
df_rf <- cm2metrics(cm_rf_simple)
# RF no proximities
cm_rf_np_simple <- ConfusionMatrix(rf_test_nprox_pred1, mvmt_all_test$label)
df_rf_np <- cm2metrics(cm_rf_np_simple)
# Plot result
models <- c("Knn", "Knn no environment", "RF", "RF no environment")
data.frame(models = models,
precision = c(round(mean(df_knn$precision),3), round(mean(df_knn_np$precision),3), round(mean(df_rf_np$precision),3), round(mean(df_rf_np$precision),3)),
recall = c(round(mean(df_knn$recall),3), round(mean(df_knn_np$recall),3), round(mean(df_rf_np$recall),3), round(mean(df_rf_np$recall),3)),
F1 = c(round(mean(df_knn$f1),3), round(mean(df_knn_np$f1),3), round(mean(df_rf_np$f1),3), round(mean(df_rf_np$f1),3))
)
## models precision recall F1
## 1 Knn 0.535 0.473 0.481
## 2 Knn no environment 0.440 0.455 0.441
## 3 RF 0.763 0.713 0.724
## 4 RF no environment 0.763 0.713 0.724
Confusion matrix - Knn
# Knn
print(ConfusionMatrix(knn_test_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 36 1 1 1 0 0 0 0 3 4
## boat 0 1 2 0 0 0 0 0 0 0
## bus 6 1 50 0 12 0 0 0 16 1
## cable_car 1 0 2 2 0 0 0 0 0 1
## car 2 0 18 0 146 0 0 0 2 0
## kick-scooter 1 0 0 0 1 0 0 0 1 0
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 1 0
## ski 10 0 10 0 7 0 0 0 5 1
## ski_lift 2 0 1 4 0 1 0 0 0 20
## t_bar 1 0 0 0 0 0 0 0 0 1
## train 0 0 3 0 52 0 2 0 0 0
## tram 7 1 23 0 6 1 0 0 4 1
## walk 2 1 2 0 2 0 0 1 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 1 2
## boat 0 1 1 0
## bus 0 0 26 6
## cable_car 0 0 0 0
## car 0 37 9 0
## kick-scooter 0 0 2 0
## plane 0 2 0 0
## run 0 0 0 3
## ski 0 0 4 1
## ski_lift 0 0 0 0
## t_bar 1 0 1 0
## train 0 63 0 3
## tram 0 0 87 7
## walk 0 0 2 315
Confusion matrix - Knn no proximities
# Knn no prox
print(ConfusionMatrix(knn_test_nprox_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 28 1 6 2 0 0 0 0 1 7
## boat 1 0 2 0 0 0 0 0 0 0
## bus 12 1 56 0 15 0 0 0 11 0
## cable_car 3 0 0 1 0 0 0 0 0 1
## car 3 0 14 0 161 0 0 0 3 0
## kick-scooter 1 0 0 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 10 0 0 0
## run 0 0 0 0 0 0 0 1 0 0
## ski 11 0 9 0 5 0 0 0 2 1
## ski_lift 4 0 0 2 0 0 0 0 0 20
## t_bar 0 0 0 0 0 0 0 0 0 1
## train 0 0 2 0 58 0 2 0 0 0
## tram 5 1 20 0 8 0 0 0 3 2
## walk 1 0 2 0 1 0 0 2 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 1 3
## boat 2 0 0 0
## bus 0 1 20 2
## cable_car 0 0 0 1
## car 0 30 3 0
## kick-scooter 2 1 1 0
## plane 0 2 0 0
## run 0 0 1 3
## ski 0 1 8 1
## ski_lift 2 0 0 0
## t_bar 2 0 0 1
## train 0 58 1 2
## tram 0 1 90 7
## walk 0 1 1 317
Confusion matrix - RF
# rf
print(ConfusionMatrix(rf_test_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 46 0 1 0 0 0 0 0 0 0
## boat 0 5 0 0 0 0 0 0 0 0
## bus 0 1 93 0 8 0 0 0 1 0
## cable_car 0 0 0 1 0 0 0 0 0 5
## car 0 0 15 0 193 0 0 0 1 0
## kick-scooter 1 0 2 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 0 0 2 0 0 0 35 1
## ski_lift 0 0 0 0 0 0 0 0 1 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 1 0 0 0 0 0
## tram 1 0 9 0 0 0 0 0 0 0
## walk 0 0 2 0 0 1 0 0 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 1 1
## boat 0 0 0 0
## bus 0 0 13 2
## cable_car 0 0 0 0
## car 0 5 0 0
## kick-scooter 0 0 0 2
## plane 0 0 0 0
## run 0 0 0 5
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 120 0 2
## tram 0 0 125 2
## walk 0 0 1 321
Confusion matrix - RF no proximities
# rf no prox
print(ConfusionMatrix(rf_test_nprox_pred1, mvmt_all_test$label))
## y_pred
## y_true bike boat bus cable_car car kick-scooter plane run ski ski_lift
## bike 44 0 2 0 1 0 0 0 0 0
## boat 0 4 0 0 0 0 0 0 0 0
## bus 0 0 95 0 7 0 0 0 2 0
## cable_car 0 0 0 2 0 0 0 0 0 4
## car 0 0 13 0 174 0 0 0 2 0
## kick-scooter 2 0 2 0 0 0 0 0 0 0
## plane 0 0 0 0 0 0 12 0 0 0
## run 0 0 0 0 0 0 0 0 0 0
## ski 0 0 0 0 2 0 0 0 34 2
## ski_lift 0 0 0 0 0 0 0 0 1 27
## t_bar 0 0 0 0 0 0 0 0 0 0
## train 0 0 0 0 33 0 0 0 0 0
## tram 3 0 21 0 2 0 0 0 0 0
## walk 0 0 2 0 0 1 0 0 0 0
## y_pred
## y_true t_bar train tram walk
## bike 0 0 2 0
## boat 0 0 0 1
## bus 0 0 12 2
## cable_car 0 0 0 0
## car 0 22 3 0
## kick-scooter 0 0 0 1
## plane 0 0 0 0
## run 0 0 0 5
## ski 0 0 0 0
## ski_lift 0 0 0 0
## t_bar 4 0 0 0
## train 0 88 0 2
## tram 0 1 105 5
## walk 0 0 1 321
For Random Forest we can see the most important features. The meanDecreaseAccuracy indicates the influence of the feature on the performance, and the MeanDecreaseGini the influence on the homogenity.
#importance(rf1)
print("Importances of features - RF")
## [1] "Importances of features - RF"
varImpPlot(rf1)
print("Importances of features - RF without ")
## [1] "Importances of features - RF without "
varImpPlot(rf1)
General plots
# Add column predictions
mvmt_all_test_g$predicted_knn <- knn_test_pred1
mvmt_all_test_g$predicted_knn_np <- knn_test_nprox_pred1
mvmt_all_test_g$predicted_rf <- rf_test_pred1
mvmt_all_test_g$predicted_rf_np <- rf_test_nprox_pred1
mvmt_all_test_g <- mvmt_all_test_g %>%
mutate(
correct_knn = predicted_knn == label,
correct_knn_np = predicted_knn_np == label,
correct_rf = predicted_rf == label,
correct_rf_np = predicted_rf_np == label
)
# Reput geometry in the test dataset
#"lst_geometry"
mvmt_all_test_g <- mvmt_all_test_g %>%
merge(
mvmt_seg_summary %>%
dplyr::select(c("segment_ID_final", "geometry")), by = "segment_ID_final"
) %>%
st_as_sf() %>% st_cast("POINT")
coords <- mvmt_all_test_g |> st_coordinates()
mvmt_all_test_g <- mvmt_all_test_g |>
mutate(E = coords[,1], N = coords[,2])
# General Plots
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "Knn - Predicted trajectory points") +
geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_knn), alpha = 0.1) +
coord_sf(datum=2056) +
theme_bw()
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "Knn no annotations - Predicted trajectory points") +
geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_knn_np), alpha = 0.1) +
coord_sf(datum=2056) +
theme_bw()
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "RF - Predicted trajectory points") +
#geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_rf)) +
coord_sf(datum=2056) +
theme_bw()
ggplot() +
geom_sf(data = ch_boundaries) +
labs(title = "RF no annotations - Predicted trajectory points") +
#geom_path() +
geom_point(data = mvmt_all_test_g, aes(E, N, color = correct_rf_np)) +
coord_sf(datum=2056) +
theme_bw()
Comparison on specific regions - Countryside
The plots below show the comparison between Knn and RF in a countryside environment. Transport mode present: car, walk , bus and train. Clicking on the points give you information on the ground truth and predicted value.
We can see that both models fail to identify a section on the top half of the map which was travelled in a bus. Only the part in the village in the North was correctly identified as bus.
We can witness differences in performances between Knn and RF in the south section (train and car). Knn has some confusion between train and car, with or without annotations. Where RF did not have any.
# Visu proximity to environment for specific days
visu_data <- mvmt_all_test_g %>% st_transform(crs=4326) %>%
filter(E > 2537700 & E < 2539620 & N > 1155376 & N < 1167736) %>%
st_transform(crs=4326)
if (visu_data$correct_knn %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn
),
color = ~pal(as.numeric(correct_knn))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "False"),
title = 'Countryside: prediction Knn')
## Plot the leaflet object m
m
if (visu_data$correct_knn_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn_np
),
color = ~pal(as.numeric(correct_knn_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Countryside: prediction Knn no annotations')
## Plot the leaflet object m
m
if (visu_data$correct_rf %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf
),
color = ~pal(as.numeric(correct_rf))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Countryside: prediction RF')
## Plot the leaflet object m
m
if (visu_data$correct_rf_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf_np
),
color = ~pal(as.numeric(correct_rf_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Countryside: prediction RF no annotations')
## Plot the leaflet object m
m
Comparison on specific regions - City - Yverdon-les-Bains
The plots below show the comparison between Knn and RF in a small city environment. Transport mode present: car, walk, bus and train. Clicking on the points give you information on the ground truth and predicted value.
Knn has again issue with detecting train sections (predict as car) with or without annotations. Also for bus with car, in the area just in front of the station.
For RF, we can see the annotation helped avoiding false prediction of car in bus. With the annotation, the prediction had no error.
# Visu proximity to environment for specific days
visu_data <- mvmt_all_test_g %>% st_transform(crs=4326) %>%
filter(E > 2537773 & E < 2542333 & N > 1180721 & N < 1182101) %>%
st_transform(crs=4326)
if (visu_data$correct_knn %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn
),
color = ~pal(as.numeric(correct_knn))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "False"),
title = 'Yverdon-les-Bains: prediction Knn')
## Plot the leaflet object m
m
if (visu_data$correct_knn_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn_np
),
color = ~pal(as.numeric(correct_knn_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Yverdon-les-Bains: prediction Knn no annotations')
## Plot the leaflet object m
m
if (visu_data$correct_rf %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf
),
color = ~pal(as.numeric(correct_rf))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Yverdon-les-Bains: prediction RF')
## Plot the leaflet object m
m
if (visu_data$correct_rf_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf_np
),
color = ~pal(as.numeric(correct_rf_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Yverdon-les-Bains: prediction RF no annotations')
## Plot the leaflet object m
m
Comparison on specific regions - City - Zurich
The plots below show the comparison between Knn and RF in a big city environment. Transport mode present: bike, walk, bus, run and tram Clicking on the points give you information on the ground truth and predicted value.
Again, Knn had more struggle than RF. It could not correctly identified the run, some bike and walk.
For RF, there has been some misclassification of run and walk, but less than Knn. Bike was also confused into bus.
# Visu proximity to environment for specific days
visu_data <- mvmt_all_test_g %>% st_transform(crs=4326) %>%
filter(E > 2682935 & E < 2683929 & N > 1249983 & N < 1250708) %>%
st_transform(crs=4326)
if (visu_data$correct_knn %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn
),
color = ~pal(as.numeric(correct_knn))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "False"),
title = 'Irchel: prediction Knn')
## Plot the leaflet object m
m
if (visu_data$correct_knn_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_knn_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_knn_np
),
color = ~pal(as.numeric(correct_knn_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Irchel: prediction Knn no annotations')
## Plot the leaflet object m
m
if (visu_data$correct_rf %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf
),
color = ~pal(as.numeric(correct_rf))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Irchel: prediction RF')
## Plot the leaflet object m
m
if (visu_data$correct_rf_np %>% as.numeric %>%unique() %>% length() ==1) {
pal <- colorNumeric(
c("#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
} else {
pal <- colorNumeric(
c("#FF0000", "#87CEFA"),
domain = as.numeric(visu_data$correct_rf_np))
}
m <- leaflet() %>%
addProviderTiles("Esri.WorldGrayCanvas") %>%
addCircleMarkers(data = visu_data,
opacity = 0.8,
radius = 0.2,
fillOpacity = 0.2,
popup = paste(
" Truth: ", visu_data$label,
". Predicted: ", visu_data$predicted_rf_np
),
color = ~pal(as.numeric(correct_rf_np))) %>%
addLegend(position = 'topright',
colors = c("#87CEFA", "#FF0000"),
labels = c('Correct', "false"),
title = 'Irchel: prediction RF no annotations')
## Plot the leaflet object m
m
Data availability - data attributes availability ( for ex. no height for planes) …
Algorithms tight to the specific area of study DATA LIMITATION
A lot of our trajectories are the same and redundant. We mainly took
the same tram or bus lines (home to university, home to sport, …).
This means our algorithms have been trained on a specific region or
network, which means the algorithm might not work efficiently in other
area. However, this is probably only an issue for public transports
modes such as bus, trams, boat, ski-lift & t-bar & cable-car and
train. Other means such as walk, are not tied to a specific network and
differences between regions are not expected.
Typically countryside bus, tram in Lsne (different network), bus in Lsne or Yverdon.
isCloseToBus -> correlated to isCloseToTram
…
Zeng, J., Yu, Y., Chen, Y., Yang, D., Zhang, L., & Wang, D. (2023). Trajectory-as-a-Sequence: A novel travel mode identification framework. Transportation Research Part C: Emerging Technologies, 146, 103957. doi:10.1016/j.trc.2022.103957
Sadeghian, P., Zhao, X., Golshan, A., & Håkansson, J. (2022). A stepwise methodology for transport mode detection in GPS tracking data. Travel Behaviour and Society, 26, 159–167. doi:10.1016/j.tbs.2021.10.004
Data sources for environment datasets: map.geoadmin.ch